From 65a554bc65f21b84f1785f66e1155be8a3aa9159 Mon Sep 17 00:00:00 2001
From: Andrew LeFevre <jalefevre@liberty.edu>
Date: Mon, 27 Apr 2026 13:25:54 -0400
Subject: [PATCH 1/4] add back deleted files

---
 .clang-format                      |  229 +
 .gitignore                         |   10 +-
 ocean/impulse_wars/CMakeLists.txt  |  138 +
 ocean/impulse_wars/Makefile        |   61 +
 ocean/impulse_wars/README.md       |   12 +
 ocean/impulse_wars/binding.h       |  177 +
 ocean/impulse_wars/helpers.h       |    4 +-
 ocean/impulse_wars/impulse_wars.py |  181 +
 ocean/impulse_wars/pyproject.toml  |   25 +
 ocean/impulse_wars/types.h         |    2 +-
 vendor/cc_array.h                  | 1410 +++++++
 vendor/cc_common.h                 |   75 +
 vendor/dlmalloc.h                  | 6264 ++++++++++++++++++++++++++++
 vendor/rlights.h                   |    2 +-
 14 files changed, 8581 insertions(+), 9 deletions(-)
 create mode 100644 .clang-format
 create mode 100644 ocean/impulse_wars/CMakeLists.txt
 create mode 100644 ocean/impulse_wars/Makefile
 create mode 100644 ocean/impulse_wars/README.md
 create mode 100644 ocean/impulse_wars/binding.h
 create mode 100644 ocean/impulse_wars/impulse_wars.py
 create mode 100644 ocean/impulse_wars/pyproject.toml
 create mode 100644 vendor/cc_array.h
 create mode 100644 vendor/cc_common.h
 create mode 100644 vendor/dlmalloc.h

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000000..d9ba19d3de
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,229 @@
+---
+Language:        Cpp
+AccessModifierOffset: -2
+AlignAfterOpenBracket: BlockIndent
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveShortCaseStatements:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCaseColons: false
+AlignEscapedNewlines: Right
+AlignOperands:   Align
+AlignTrailingComments:
+  Kind:            Always
+  OverEmptyLines:  0
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowBreakBeforeNoexceptSpecifier: Never
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortCompoundRequirementOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+AttributeMacros:
+  - __capability
+BinPackArguments: false
+BinPackParameters: false
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterExternBlock: false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Leave
+BreakAfterJavaFieldAnnotations: false
+BreakArrays:     true
+BreakBeforeBinaryOperators: None
+BreakBeforeClosingBracket: Always
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: Custom
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+ColumnLimit:     0
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Preserve
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: false
+IndentExternBlock: AfterExternBlock
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentRequiresClause: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+InsertBraces:    true
+InsertNewlineAtEOF: false
+InsertTrailingCommas: Wrapped
+IntegerLiteralSeparator:
+  Binary:          0
+  BinaryMinDigits: 0
+  Decimal:         0
+  DecimalMinDigits: 0
+  Hex:             0
+  HexMinDigits:    0
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+KeepEmptyLinesAtEOF: false
+LambdaBodyIndentation: Signature
+LineEnding:      DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PackConstructorInitializers: Never
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakScopeResolution: 500
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+PPIndentWidth:   -1
+QualifierAlignment: Leave
+ReferenceAlignment: Pointer
+ReflowComments:  true
+RemoveBracesLLVM: false
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SkipMacroDefinitionBody: false
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeJsonColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  AfterOverloadedOperator: false
+  AfterPlacementOperator: true
+  AfterRequiresInClause: false
+  AfterRequiresInExpression: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  Never
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParens:  Never
+SpacesInParensOptions:
+  InCStyleCasts:   false
+  InConditionalStatements: false
+  InEmptyParentheses: false
+  Other:           false
+SpacesInSquareBrackets: false
+Standard:        Latest
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        4
+UseTab:          Never
+VerilogBreakBetweenInstancePorts: true
+WhitespaceSensitiveMacros:
+  - BOOST_PP_STRINGIZE
+  - CF_SWIFT_NAME
+  - NS_SWIFT_NAME
+  - PP_STRINGIZE
+  - STRINGIZE
+...
+
diff --git a/.gitignore b/.gitignore
index b01c266e47..5b31358da0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,11 +162,11 @@ raylib*/
 box2d*/
 
 # Temp Impulse Wars files
-pufferlib/ocean/impulse_wars/*-debug/
-pufferlib/ocean/impulse_wars/*-release/
-pufferlib/ocean/impulse_wars/debug-*/
-pufferlib/ocean/impulse_wars/release-*/
-pufferlib/ocean/impulse_wars/benchmark/
+ocean/impulse_wars/*-debug/
+ocean/impulse_wars/*-release/
+ocean/impulse_wars/debug-*/
+ocean/impulse_wars/release-*/
+ocean/impulse_wars/benchmark/
 
 # Data
 resources/drive/data/*
diff --git a/ocean/impulse_wars/CMakeLists.txt b/ocean/impulse_wars/CMakeLists.txt
new file mode 100644
index 0000000000..55f49a73f6
--- /dev/null
+++ b/ocean/impulse_wars/CMakeLists.txt
@@ -0,0 +1,138 @@
+# 3.22 was released on Nov 2021, should be widely available
+cmake_minimum_required(VERSION 3.22)
+include(FetchContent)
+
+project(
+	impulse-wars
+	DESCRIPTION "Impulse Wars"
+	LANGUAGES C
+)
+
+message(INFO " C Compiler: ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} ${CMAKE_C_COMPILER_ID}")
+
+# use ccache if available to speed up subsequent builds
+find_program(CCACHE_FOUND "ccache")
+if(CCACHE_FOUND)
+	set(CMAKE_C_COMPILER_LAUNCHER "ccache")
+endif()
+
+# enable some C23 features, the c2x standard is a WIP standard supported
+# by gcc since 9 (May 2019) and clang since 9 (Sep 2019)
+set(CMAKE_C_FLAGS_INIT " -std=c2x")
+
+# force position independent code everywhere to prevent some rare
+# linker errors depending on what compiler is used
+add_compile_options("-fPIC")
+
+if(CMAKE_BUILD_TYPE MATCHES Debug)
+	# leak detection doesn't work correctly when the code is called by
+	# Python, so disable it
+	if(DEFINED BUILD_PYTHON_MODULE)
+		add_compile_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow")
+		add_link_options("-shared-libasan" "-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow")
+	else()
+		add_compile_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow,leak")
+		add_link_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow,leak")
+	endif()
+
+	# mold is an extremely fast linker, use it if available
+	# only use mold in debug mode, link time optimization currently doesn't
+	# work with mold and provides large speedups
+	find_program(MOLD_FOUND "mold")
+	if(MOLD_FOUND)
+		add_link_options("-fuse-ld=mold")
+	endif()
+else()
+	add_compile_options("-flto" "-fno-math-errno")
+	if (NOT DEFINED EMSCRIPTEN)
+		# emscripten doesn't support -march=native, it doesn't make sense
+		# for WASM anyway
+		add_compile_options("-march=native")
+	else()
+		# tell emscripten to generate an HTML file that can be used to
+		# test the WASM, and ensure necessary code is transformed to be
+		# async friendly; it allows the game to be run much more smoothly
+		set(CMAKE_EXECUTABLE_SUFFIX ".html")
+		add_link_options("-sASYNCIFY")
+	endif()
+	# ensure the linker used is from the same compiler toolchain, or else
+	# link time optimization will probably fail; if we're using
+	# emscripten it will use it's own linker
+	if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT DEFINED EMSCRIPTEN)
+		add_link_options("-fuse-ld=lld")
+	endif()
+
+	# add_compile_options("-pg")
+	# add_link_options("-pg")
+endif()
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+set(FETCHCONTENT_QUIET FALSE)
+
+# fetch and configure dependencies
+FetchContent_Declare(
+	raylib
+	URL https://github.com/raysan5/raylib/archive/c1ab645ca298a2801097931d1079b10ff7eb9df8.zip # 5.5
+)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "Statically link raylib" FORCE)
+set(WITH_PIC "Compile static library as position-independent code" ON)
+set(CUSTOMIZE_BUILD ON CACHE BOOL "Customize raylib build settings" FORCE)
+set(USE_AUDIO OFF CACHE BOOL "Don't build unused audio module" FORCE)
+FetchContent_MakeAvailable(raylib)
+
+# if box2d is fetched first installing built python module will fail
+# for reasons unbeknownst to mere mortals
+# maybe due to install prefix schenanigans?
+FetchContent_Declare(
+	box2d
+	URL https://github.com/capnspacehook/box2d/archive/df25d747be0ab2fd9425eece022d2ec897c2028d.zip
+)
+set(BOX2D_ENABLE_SIMD ON CACHE BOOL "Enable SIMD math (faster)" FORCE)
+set(BOX2D_AVX2 ON CACHE BOOL "Enable AVX2 (faster)" FORCE)
+add_compile_definitions(B2_MAX_WORLDS=65534)
+FetchContent_MakeAvailable(box2d)
+# this is set to off by box2d to enable cross platform determinism, but
+# I don't care about that and want the small speedup instead
+target_compile_options(box2d PRIVATE "-ffp-contract=fast")
+
+function(configure_target target_name)
+	target_include_directories(
+		${target_name} PRIVATE
+		"${CMAKE_CURRENT_SOURCE_DIR}"
+		"${CMAKE_CURRENT_SOURCE_DIR}/../../vendor"
+	)
+
+	# Mark box2d as a system include directory to suppress warnings from it
+	target_include_directories(${target_name} SYSTEM PRIVATE "${box2d_SOURCE_DIR}/src")
+
+	target_link_libraries(${target_name} PRIVATE raylib box2d)
+
+	target_compile_options(${target_name} PRIVATE
+		"-Werror" "-Wall" "-Wextra" "-Wpedantic"
+		"-Wno-implicit-fallthrough" "-Wno-variadic-macros" "-Wno-strict-prototypes" "-Wno-gnu-statement-expression"
+	)
+endfunction()
+
+if(DEFINED BUILD_PYTHON_MODULE)
+	find_package(
+		Python
+		COMPONENTS Interpreter Development.Module NumPy
+		REQUIRED
+	)
+
+	python_add_library(binding MODULE binding.c WITH_SOABI)
+
+	target_include_directories(binding PRIVATE
+    	${Python_NumPy_INCLUDE_DIRS}
+	)
+
+	configure_target(binding)
+
+	install(TARGETS binding DESTINATION .)
+elseif(DEFINED BUILD_DEMO)
+	add_executable(demo "${CMAKE_CURRENT_SOURCE_DIR}/impulse_wars.c")
+	configure_target(demo)
+elseif(DEFINED BUILD_BENCHMARK)
+	add_executable(benchmark "${CMAKE_CURRENT_SOURCE_DIR}/benchmark.c")
+	configure_target(benchmark)
+endif()
diff --git a/ocean/impulse_wars/Makefile b/ocean/impulse_wars/Makefile
new file mode 100644
index 0000000000..ce593669da
--- /dev/null
+++ b/ocean/impulse_wars/Makefile
@@ -0,0 +1,61 @@
+RELEASE_PYTHON_MODULE_DIR := python-module-release
+DEBUG_PYTHON_MODULE_DIR := python-module-debug
+DEBUG_DIR := debug-demo
+RELEASE_DIR := release-demo
+RELEASE_WEB_DIR := release-demo-web
+BENCHMARK_DIR := benchmark
+
+DEBUG_BUILD_TYPE := Debug
+RELEASE_BUILD_TYPE := Release
+
+# install build dependencies if this is a fresh build, Python won't
+# install build dependencies when --no-build-isolation is passed
+# build with no isolation so that builds can be cached and/or incremental
+
+# build Python module in release mode
+.PHONY: python-module-release
+python-module-release:
+	@test -d $(RELEASE_PYTHON_MODULE_DIR) || pip install scikit-build-core autopxd2 cython
+	@pip install --no-build-isolation --config-settings=editable.rebuild=true -Cbuild-dir=$(RELEASE_PYTHON_MODULE_DIR) -v .
+
+# build Python module in debug mode
+.PHONY: python-module-debug
+python-module-debug:
+	@test -d $(DEBUG_PYTHON_MODULE_DIR) || pip install scikit-build-core autopxd2 cython
+	@pip install --no-build-isolation --config-settings=editable.rebuild=true --config-settings=cmake.build-type="Debug" -Cbuild-dir=$(DEBUG_PYTHON_MODULE_DIR) -v .	
+
+# build C demo in debug mode
+.PHONY: debug-demo
+debug-demo:
+	@mkdir -p $(DEBUG_DIR)
+	@cd $(DEBUG_DIR) && \
+	cmake -GNinja -DCMAKE_BUILD_TYPE=$(DEBUG_BUILD_TYPE) -DBUILD_DEMO=true -DCMAKE_C_COMPILER=clang-20 .. && \
+	cmake --build .
+
+# build C demo in release mode
+.PHONY: release-demo
+release-demo:
+	@mkdir -p $(RELEASE_DIR)
+	@cd $(RELEASE_DIR) && \
+	cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DBUILD_DEMO=true -DCMAKE_C_COMPILER=clang-20 .. && \
+	cmake --build .
+
+# build C demo in release mode for web
+.PHONY: release-demo-web
+release-demo-web:
+	@mkdir -p $(RELEASE_WEB_DIR)
+	@cd $(RELEASE_WEB_DIR) && \
+	emcmake cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DPLATFORM=Web -DBUILD_DEMO=true .. && \
+	cmake --build .
+
+# build C benchmark
+.PHONY: benchmark
+benchmark:
+	@mkdir -p $(BENCHMARK_DIR)
+	@cd $(BENCHMARK_DIR) && \
+	cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DBUILD_BENCHMARK=true -DCMAKE_C_COMPILER=clang-20 .. && \
+	cmake --build .
+
+.PHONY: clean
+clean:
+	@rm -rf build $(RELEASE_PYTHON_MODULE_DIR) $(DEBUG_PYTHON_MODULE_DIR) $(DEBUG_DIR) $(RELEASE_DIR) $(RELEASE_WEB_DIR) $(BENCHMARK_DIR)
diff --git a/ocean/impulse_wars/README.md b/ocean/impulse_wars/README.md
new file mode 100644
index 0000000000..accca74381
--- /dev/null
+++ b/ocean/impulse_wars/README.md
@@ -0,0 +1,12 @@
+# Impulse Wars
+
+To build, you need to have the following:
+- cmake
+- make
+- ninja
+- raylib required deps installed: https://github.com/raysan5/raylib/wiki/Working-on-GNU-Linux
+
+Run `make && cp python-module-release/binding.*.so .` to build the python module in release mode.
+`puffer_impulse_wars` env should now be trainable.
+
+When watching evaluations, you need to set all instances of `is_training = False` and `render = True` in the config file.
diff --git a/ocean/impulse_wars/binding.h b/ocean/impulse_wars/binding.h
new file mode 100644
index 0000000000..28b429773b
--- /dev/null
+++ b/ocean/impulse_wars/binding.h
@@ -0,0 +1,177 @@
+#include <Python.h>
+
+#include "env.h"
+
+static PyObject *get_consts(PyObject *self, PyObject *args);
+
+#define Env iwEnv
+#define MY_SHARED
+#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"}
+
+#include "../env_binding.h"
+
+#define setDictVal(dict, key, val)                                            \
+    if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) {          \
+        PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \
+        return NULL;                                                          \
+    }
+
+static PyObject *get_consts(PyObject *self, PyObject *args) {
+    PyObject *dronesArg = PyTuple_GetItem(args, 0);
+    if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) {
+        PyErr_SetString(PyExc_TypeError, "num_drones must be an integer");
+        return NULL;
+    }
+    const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg);
+
+    PyObject *dict = PyDict_New();
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+
+    const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE);
+
+    setDictVal(dict, "obsBytes", obsBytes(numDrones));
+    setDictVal(dict, "mapObsSize", MAP_OBS_SIZE);
+    setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones));
+    setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones));
+    setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float));
+    setDictVal(dict, "wallTypes", NUM_WALL_TYPES);
+    setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1);
+    setDictVal(dict, "mapObsRows", MAP_OBS_ROWS);
+    setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS);
+    setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float)));
+    setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS);
+    setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET);
+    setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE);
+    setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE);
+    setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET);
+    setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS);
+    setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET);
+    setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE);
+    setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE);
+    setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET);
+    setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS);
+    setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET);
+    setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE);
+    setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE);
+    setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET);
+    setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS);
+    setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET);
+    setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET);
+    setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE);
+    setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE);
+    setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET);
+    setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET);
+    setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET);
+    setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE);
+    setDictVal(dict, "droneObsOffset", droneObsOffset);
+    setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE);
+    setDictVal(dict, "miscObsSize", MISC_OBS_SIZE);
+    setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE);
+
+    setDictVal(dict, "maxDrones", MAX_DRONES);
+    setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE);
+
+    return dict;
+}
+
+static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
+    VecEnv *ve = unpack_vecenv(args);
+    initMaps(ve->envs[0]);
+
+    for (uint16_t i = 0; i < ve->num_envs; i++) {
+        iwEnv *e = (iwEnv *)ve->envs[i];
+        setupEnv(e);
+    }
+
+    return Py_None;
+}
+
+static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) {
+    initEnv(
+        e,
+        (uint8_t)unpack(kwargs, "num_drones"),
+        (uint8_t)unpack(kwargs, "num_agents"),
+        (int8_t)unpack(kwargs, "map_idx"),
+        (uint64_t)unpack(kwargs, "seed"),
+        (bool)unpack(kwargs, "enable_teams"),
+        (bool)unpack(kwargs, "sitting_duck"),
+        (bool)unpack(kwargs, "is_training"),
+        (bool)unpack(kwargs, "continuous")
+    );
+    setRewards(
+        e,
+        (float)unpack(kwargs, "reward_win"),
+        (float)unpack(kwargs, "reward_self_kill"),
+        (float)unpack(kwargs, "reward_enemy_death"),
+        (float)unpack(kwargs, "reward_enemy_kill"),
+        0.0f, // teammate death punishment
+        0.0f, // teammate kill punishment
+        (float)unpack(kwargs, "reward_death"),
+        (float)unpack(kwargs, "reward_energy_emptied"),
+        (float)unpack(kwargs, "reward_weapon_pickup"),
+        (float)unpack(kwargs, "reward_shield_break"),
+        (float)unpack(kwargs, "reward_shot_hit_coef"),
+        (float)unpack(kwargs, "reward_explosion_hit_coef")
+    );
+    return 0;
+}
+
+#define _LOG_BUF_SIZE 128
+
+char *droneLog(char *buf, const uint8_t droneIdx, const char *name) {
+    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name);
+    return buf;
+}
+
+char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) {
+    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name);
+    return buf;
+}
+
+static int my_log(PyObject *dict, Log *log) {
+    assign_to_dict(dict, "episode_length", log->length);
+    assign_to_dict(dict, "ties", log->ties);
+
+    assign_to_dict(dict, "perf", log->stats[0].wins);
+    assign_to_dict(dict, "score", log->stats[0].wins);
+
+    char buf[_LOG_BUF_SIZE] = {0};
+    for (uint8_t i = 0; i < MAX_DRONES; i++) {
+        assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns);
+        assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled);
+        assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled);
+        assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime);
+        assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts);
+        assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit);
+        assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied);
+        assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken);
+        assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken);
+        assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills);
+        assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills);
+        assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills);
+        assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins);
+
+        // useful for debugging weapon balance, but really slows down
+        // sweeps due to adding a ton of extra logging data
+        //
+        // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) {
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]);
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]);
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]);
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]);
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]);
+        //     assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]);
+        // }
+
+        assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired);
+        assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit);
+        assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken);
+        assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken);
+        assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp);
+        assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances);
+    }
+
+    return 0;
+}
diff --git a/ocean/impulse_wars/helpers.h b/ocean/impulse_wars/helpers.h
index 1692d6b9bd..7fb6a8305a 100644
--- a/ocean/impulse_wars/helpers.h
+++ b/ocean/impulse_wars/helpers.h
@@ -8,7 +8,7 @@
 
 #include "box2d/box2d.h"
 
-#include "include/cc_array.h"
+#include "cc_array.h"
 
 #ifndef NDEBUG
 #define ON_ERROR __builtin_trap()
@@ -121,7 +121,7 @@
 #define fastFree(ptr) free(ptr)
 #define fastFreeFn free
 #else
-#include "include/dlmalloc.h"
+#include "dlmalloc.h"
 #define fastMalloc(size) dlmalloc(size)
 #define fastMallocFn dlmalloc
 #define fastCalloc(nmemb, size) dlcalloc(nmemb, size)
diff --git a/ocean/impulse_wars/impulse_wars.py b/ocean/impulse_wars/impulse_wars.py
new file mode 100644
index 0000000000..6fc2f5d27e
--- /dev/null
+++ b/ocean/impulse_wars/impulse_wars.py
@@ -0,0 +1,181 @@
+from types import SimpleNamespace
+
+import gymnasium
+import numpy as np
+
+import pufferlib
+from pufferlib.ocean.impulse_wars import binding
+
+
+discMoveToContMove = np.array([
+    [1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0, 0.707107, 0.0],
+    [0.0, 0.707107, 1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0],
+], dtype=np.float32)
+discAimToContAim = np.array([
+    [1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0, 0.382683, 0.707107, 0.92388, 0.0],
+    [0.0, 0.382683, 0.707107, 0.92388, 1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0],
+], dtype=np.float32)
+
+
+class ImpulseWars(pufferlib.PufferEnv):
+    def __init__(
+        self,
+        num_envs: int = 1,
+        num_drones: int = 2,
+        num_agents: int = 1,
+        enable_teams: bool = False,
+        sitting_duck: bool = False,
+        continuous: bool = False,
+        is_training: bool = True,
+        human_control: bool = False,
+        reward_win: float = 2.0,
+        reward_self_kill: float = -1.0,
+        reward_enemy_death: float = 1.0,
+        reward_enemy_kill: float = 1.0,
+        reward_death: float = -0.25,
+        reward_energy_emptied: float = -0.75,
+        reward_weapon_pickup: float = 0.5,
+        reward_shield_break: float = 0.5,
+        reward_shot_hit_coef: float = 0.005, 
+        reward_explosion_hit_coef: float = 0.005,
+        seed: int = 0,
+        render: bool = False,
+        report_interval: int = 64,
+        buf = None,
+    ):
+        self.obsInfo = SimpleNamespace(**binding.get_consts(num_drones))
+
+        if num_envs <= 0:
+            raise ValueError("num_envs must be greater than 0")
+        if num_drones > self.obsInfo.maxDrones or num_drones <= 0:
+            raise ValueError(f"num_drones must greater than 0 and less than or equal to {self.obsInfo.maxDrones}")
+        if num_agents > num_drones or num_agents <= 0:
+            raise ValueError("num_agents must greater than 0 and less than or equal to num_drones")
+        if enable_teams and (num_drones % 2 != 0 or num_drones <= 2):
+            raise ValueError("enable_teams is only supported for even numbers of drones greater than 2")
+
+        self.numDrones = num_drones
+        self.continuous = continuous
+
+        self.num_agents = num_agents * num_envs
+        self.tick = 0
+
+        # map observations are bit packed to save space, and scalar
+        # observations need to be floats
+        self.single_observation_space = gymnasium.spaces.Box(
+            low=0, high=255, shape=(self.obsInfo.obsBytes,), dtype=np.uint8
+        )
+
+        if self.continuous:
+            # action space is actually bounded by (-1, 1) but pufferlib
+            # will check that actions are within the bounds of the action
+            # space before actions get to the env, and we ensure the actions
+            # are bounded there; so set bounds to (-inf, inf) here so
+            # action bounds checks pass
+            self.single_action_space = gymnasium.spaces.Box(
+                low=float("-inf"), high=float("inf"), shape=(self.obsInfo.contActionsSize,), dtype=np.float32
+            )
+        else:
+            self.single_action_space = gymnasium.spaces.MultiDiscrete(
+                [
+                    9,  # move, noop + 8 directions
+                    17,  # aim, noop + 16 directions
+                    2,  # shoot or not
+                    2,  # brake or not
+                    2,  # burst
+                ]
+            )
+
+        self.report_interval = report_interval
+        self.render_mode = "human" if render else None
+
+        super().__init__(buf)
+        if not self.continuous:
+            self.actions = np.zeros((self.num_agents, self.obsInfo.contActionsSize), dtype=np.float32)
+
+        self.c_envs = binding.vec_init(
+            self.observations,
+            self.actions,
+            self.rewards,
+            self.terminals,
+            self.truncations,
+            num_envs,
+            seed,
+            num_drones=num_drones,
+            num_agents=num_agents,
+            map_idx=-1,
+            enable_teams=enable_teams,
+            sitting_duck=sitting_duck,
+            is_training=is_training,
+            continuous=continuous,
+            reward_win=reward_win,
+            reward_self_kill=reward_self_kill,
+            reward_enemy_death=reward_enemy_death,
+            reward_enemy_kill=reward_enemy_kill,
+            reward_death=reward_death,
+            reward_energy_emptied=reward_energy_emptied,
+            reward_weapon_pickup=reward_weapon_pickup,
+            reward_shield_break=reward_shield_break,
+            reward_shot_hit_coef=reward_shot_hit_coef,
+            reward_explosion_hit_coef=reward_explosion_hit_coef,
+        )
+
+        binding.shared(self.c_envs)
+
+    def reset(self, seed=None):
+        self.tick = 0
+        if seed is None:
+            binding.vec_reset(self.c_envs, 0)
+        else:
+            binding.vec_reset(self.c_envs, seed)
+        return self.observations, []
+
+    def step(self, actions):
+        if self.continuous:
+            self.actions[:] = actions
+        else:
+            contMove = discMoveToContMove[:, actions[:, 0]].T
+            contAim =  discAimToContAim[:, actions[:, 1]].T
+            contRest = actions[:, 2:].astype(np.float32)
+            self.actions[:] = np.concatenate([contMove, contAim, contRest], axis=1)
+
+        self.tick += 1    
+        binding.vec_step(self.c_envs)
+
+        infos = []
+        if self.tick % self.report_interval == 0:
+            infos.append(binding.vec_log(self.c_envs))
+
+        return self.observations, self.rewards, self.terminals, self.truncations, infos
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+
+def testPerf(timeout, actionCache, numEnvs):
+    env = ImpulseWars(numEnvs)
+
+    import time
+
+    np.random.seed(int(time.time()))
+    actions = np.random.uniform(-1, 1, (actionCache, env.num_agents, 7))
+
+    tick = 0
+    start = time.time()
+    while time.time() - start < timeout:
+        action = actions[tick % actionCache]
+        env.step(action)
+        tick += 1
+
+    sps = numEnvs * (tick / (time.time() - start))
+    print(f"SPS: {sps:,}")
+    print(f"Steps: {numEnvs * tick}")
+
+    env.close()
+
+
+if __name__ == "__main__":
+    testPerf(timeout=5, actionCache=1024, numEnvs=1)
diff --git a/ocean/impulse_wars/pyproject.toml b/ocean/impulse_wars/pyproject.toml
new file mode 100644
index 0000000000..df67b2bd17
--- /dev/null
+++ b/ocean/impulse_wars/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["scikit-build-core>=0.10", "autopxd2>=2.5.0", "cython>=3.0.11"]
+build-backend = "scikit_build_core.build"
+
+[project]
+name = "binding"
+version = "1.0.0"
+requires-python = ">=3.11"
+
+[tool.scikit-build]
+minimum-version = "build-system.requires"
+cmake.build-type = "Release"
+build.verbose = true
+logging.level = "INFO"
+
+[tool.scikit-build.cmake.define]
+BUILD_PYTHON_MODULE = true
+CMAKE_C_COMPILER = "clang-20"
+
+[tool.ruff]
+line-length = 110
+
+[tool.ruff.lint]
+# skip "Module level import not at top of file"
+ignore = ["E402"]
diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h
index 6df5014e95..9bad059336 100644
--- a/ocean/impulse_wars/types.h
+++ b/ocean/impulse_wars/types.h
@@ -6,7 +6,7 @@
 #include "raylib.h"
 #include "rlights.h"
 
-#include "include/cc_array.h"
+#include "cc_array.h"
 
 #include "settings.h"
 
diff --git a/vendor/cc_array.h b/vendor/cc_array.h
new file mode 100644
index 0000000000..311f99122b
--- /dev/null
+++ b/vendor/cc_array.h
@@ -0,0 +1,1410 @@
+/*
+ * Collections-C
+ * Copyright (C) 2013-2015 Srđan Panić <srdja.panic@gmail.com>
+ *
+ * This file is part of Collections-C.
+ *
+ * Collections-C is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Collections-C is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Collections-C.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef CC_ARRAY_H
+#define CC_ARRAY_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "cc_common.h"
+
+/**
+ * A dynamic array that expands automatically as elements are
+ * added. The array supports amortized constant time insertion
+ * and removal of elements at the end of the array, as well as
+ * constant time access.
+ */
+typedef struct cc_array_s CC_Array;
+
+/**
+ * Array configuration structure. Used to initialize a new Array
+ * with specific values.
+ */
+typedef struct cc_array_conf_s {
+    /**
+     * The initial capacity of the array */
+    size_t capacity;
+
+    /**
+     * The rate at which the buffer expands (capacity * exp_factor). */
+    float exp_factor;
+
+    /**
+     * Memory allocators used to allocate the Array structure and the
+     * underlying data buffers. */
+    void *(*mem_alloc)(size_t size);
+    void *(*mem_calloc)(size_t blocks, size_t size);
+    void (*mem_free)(void *block);
+} CC_ArrayConf;
+
+/**
+ * Array iterator structure. Used to iterate over the elements of
+ * the array in an ascending order. The iterator also supports
+ * operations for safely adding and removing elements during
+ * iteration.
+ */
+typedef struct cc_array_iter_s {
+    /**
+     * The array associated with this iterator */
+    CC_Array *ar;
+
+    /**
+     * The current position of the iterator.*/
+    size_t index;
+
+    /**
+     * Set to true if the last returned element was removed. */
+    bool last_removed;
+} CC_ArrayIter;
+
+/**
+ * Array zip iterator structure. Used to iterate over the elements of two
+ * arrays in lockstep in an ascending order until one of the Arrays is
+ * exhausted. The iterator also supports operations for safely adding
+ * and removing elements during iteration.
+ */
+typedef struct array_zip_iter_s {
+    CC_Array *ar1;
+    CC_Array *ar2;
+    size_t index;
+    bool last_removed;
+} CC_ArrayZipIter;
+
+enum cc_stat cc_array_new(CC_Array **out);
+enum cc_stat cc_array_new_conf(CC_ArrayConf const *const conf, CC_Array **out);
+void cc_array_conf_init(CC_ArrayConf *conf);
+size_t cc_array_struct_size();
+
+void cc_array_destroy(CC_Array *ar);
+void cc_array_destroy_cb(CC_Array *ar, void (*cb)(void *));
+
+enum cc_stat cc_array_add(CC_Array *ar, void *element);
+enum cc_stat cc_array_add_at(CC_Array *ar, void *element, size_t index);
+enum cc_stat cc_array_replace_at(CC_Array *ar, void *element, size_t index, void **out);
+enum cc_stat cc_array_swap_at(CC_Array *ar, size_t index1, size_t index2);
+
+enum cc_stat cc_array_remove(CC_Array *ar, void *element, void **out);
+enum cc_stat cc_array_remove_fast(CC_Array *ar, void *element, void **out);
+enum cc_stat cc_array_remove_at(CC_Array *ar, size_t index, void **out);
+enum cc_stat cc_array_remove_fast_at(CC_Array *ar, size_t index, void **out);
+enum cc_stat cc_array_remove_last(CC_Array *ar, void **out);
+void cc_array_remove_all(CC_Array *ar);
+void cc_array_remove_all_free(CC_Array *ar);
+
+enum cc_stat cc_array_get_at(const CC_Array *ar, size_t index, void **out);
+enum cc_stat cc_array_get_last(const CC_Array *ar, void **out);
+
+enum cc_stat cc_array_subarray(CC_Array *ar, size_t from, size_t to, CC_Array **out);
+enum cc_stat cc_array_copy_shallow(CC_Array *ar, CC_Array **out);
+enum cc_stat cc_array_copy_deep(CC_Array *ar, void *(*cp)(void *), CC_Array **out);
+
+void cc_array_reverse(CC_Array *ar);
+enum cc_stat cc_array_trim_capacity(CC_Array *ar);
+
+size_t cc_array_contains(const CC_Array *ar, void *element);
+size_t cc_array_contains_value(const CC_Array *ar, void *element, int (*cmp)(const void *, const void *));
+size_t cc_array_size(const CC_Array *ar);
+size_t cc_array_capacity(const CC_Array *ar);
+
+enum cc_stat cc_array_index_of(const CC_Array *ar, void *element, size_t *index);
+void cc_array_sort(CC_Array *ar, int (*cmp)(const void *, const void *));
+
+void cc_array_map(CC_Array *ar, void (*fn)(void *));
+void cc_array_reduce(CC_Array *ar, void (*fn)(void *, void *, void *), void *result);
+
+enum cc_stat cc_array_filter_mut(CC_Array *ar, bool (*predicate)(const void *));
+enum cc_stat cc_array_filter(CC_Array *ar, bool (*predicate)(const void *), CC_Array **out);
+
+void cc_array_iter_init(CC_ArrayIter *iter, CC_Array *ar);
+enum cc_stat cc_array_iter_next(CC_ArrayIter *iter, void **out);
+enum cc_stat cc_array_iter_remove(CC_ArrayIter *iter, void **out);
+enum cc_stat cc_array_iter_remove_fast(CC_ArrayIter *iter, void **out);
+enum cc_stat cc_array_iter_add(CC_ArrayIter *iter, void *element);
+enum cc_stat cc_array_iter_replace(CC_ArrayIter *iter, void *element, void **out);
+size_t cc_array_iter_index(CC_ArrayIter *iter);
+
+void cc_array_zip_iter_init(CC_ArrayZipIter *iter, CC_Array *a1, CC_Array *a2);
+enum cc_stat cc_array_zip_iter_next(CC_ArrayZipIter *iter, void **out1, void **out2);
+enum cc_stat cc_array_zip_iter_add(CC_ArrayZipIter *iter, void *e1, void *e2);
+enum cc_stat cc_array_zip_iter_remove(CC_ArrayZipIter *iter, void **out1, void **out2);
+enum cc_stat cc_array_zip_iter_replace(CC_ArrayZipIter *iter, void *e1, void *e2, void **out1, void **out2);
+size_t cc_array_zip_iter_index(CC_ArrayZipIter *iter);
+
+const void *const *cc_array_get_buffer(CC_Array *ar);
+
+#define CC_ARRAY_FOREACH(val, array, body)                                               \
+    {                                                                                    \
+        CC_ArrayIter cc_array_iter_53d46d2a04458e7b;                                     \
+        cc_array_iter_init(&cc_array_iter_53d46d2a04458e7b, array);                      \
+        void *val;                                                                       \
+        while (cc_array_iter_next(&cc_array_iter_53d46d2a04458e7b, &val) != CC_ITER_END) \
+            body                                                                         \
+    }
+
+#define CC_ARRAY_FOREACH_ZIP(val1, val2, array1, array2, body)                                            \
+    {                                                                                                     \
+        CC_ArrayZipIter cc_array_zip_iter_ea08d3e52f25883b3;                                              \
+        cc_array_zip_iter_init(&cc_array_zip_iter_ea08d3e52f25883b3, array1, array2);                     \
+        void *val1;                                                                                       \
+        void *val2;                                                                                       \
+        while (cc_array_zip_iter_next(&cc_array_zip_iter_ea08d3e52f25883b3, &val1, &val2) != CC_ITER_END) \
+            body                                                                                          \
+    }
+
+#define DEFAULT_CAPACITY 8
+#define DEFAULT_EXPANSION_FACTOR 2
+
+struct cc_array_s {
+    size_t size;
+    size_t capacity;
+    float exp_factor;
+    void **buffer;
+
+    void *(*mem_alloc)(size_t size);
+    void *(*mem_calloc)(size_t blocks, size_t size);
+    void (*mem_free)(void *block);
+};
+
+static enum cc_stat expand_array_capacity(CC_Array *ar);
+
+/**
+ * Creates a new empty array and returns a status code.
+ *
+ * @param[out] out pointer to where the newly created CC_Array is to be stored
+ *
+ * @return CC_OK if the creation was successful, or CC_ERR_ALLOC if the
+ * memory allocation for the new CC_Array structure failed.
+ */
+enum cc_stat cc_array_new(CC_Array **out) {
+    CC_ArrayConf c;
+    cc_array_conf_init(&c);
+    return cc_array_new_conf(&c, out);
+}
+
+/**
+ * Creates a new empty CC_Array based on the specified CC_ArrayConf struct and
+ * returns a status code.
+ *
+ * The CC_Array is allocated using the allocators specified in the CC_ArrayConf
+ * struct. The allocation may fail if underlying allocator fails. It may also
+ * fail if the values of exp_factor and capacity in the CC_ArrayConf do not meet
+ * the following condition: <code>exp_factor < (CC_MAX_ELEMENTS / capacity)</code>.
+ *
+ * @param[in] conf array configuration structure
+ * @param[out] out pointer to where the newly created CC_Array is to be stored
+ *
+ * @return CC_OK if the creation was successful, CC_ERR_INVALID_CAPACITY if
+ * the above mentioned condition is not met, or CC_ERR_ALLOC if the memory
+ * allocation for the new CC_Array structure failed.
+ */
+enum cc_stat cc_array_new_conf(CC_ArrayConf const *const conf, CC_Array **out) {
+    float ex;
+
+    /* The expansion factor must be greater than one for the
+     * array to grow */
+    if (conf->exp_factor <= 1) {
+        ex = DEFAULT_EXPANSION_FACTOR;
+    } else {
+        ex = conf->exp_factor;
+    }
+
+    /* Needed to avoid an integer overflow on the first resize and
+     * to easily check for any future overflows. */
+    if (!conf->capacity || ex >= CC_MAX_ELEMENTS / conf->capacity) {
+        return CC_ERR_INVALID_CAPACITY;
+    }
+
+    CC_Array *ar = (CC_Array *)conf->mem_calloc(1, sizeof(CC_Array));
+
+    if (!ar) {
+        return CC_ERR_ALLOC;
+    }
+
+    void **buff = (void **)conf->mem_alloc(conf->capacity * sizeof(void *));
+
+    if (!buff) {
+        conf->mem_free(ar);
+        return CC_ERR_ALLOC;
+    }
+
+    ar->buffer = buff;
+    ar->exp_factor = ex;
+    ar->capacity = conf->capacity;
+    ar->mem_alloc = conf->mem_alloc;
+    ar->mem_calloc = conf->mem_calloc;
+    ar->mem_free = conf->mem_free;
+
+    *out = ar;
+    return CC_OK;
+}
+
+/**
+ * Initializes the fields of the CC_ArrayConf struct to default values.
+ *
+ * @param[in, out] conf CC_ArrayConf structure that is being initialized
+ */
+void cc_array_conf_init(CC_ArrayConf *conf) {
+    conf->exp_factor = DEFAULT_EXPANSION_FACTOR;
+    conf->capacity = DEFAULT_CAPACITY;
+    conf->mem_alloc = malloc;
+    conf->mem_calloc = calloc;
+    conf->mem_free = free;
+}
+
+/**
+ * Destroys the CC_Array structure, but leaves the data it used to hold intact.
+ *
+ * @param[in] ar the array that is to be destroyed
+ */
+void cc_array_destroy(CC_Array *ar) {
+    ar->mem_free(ar->buffer);
+    ar->mem_free(ar);
+}
+
+/**
+ * Destroys the CC_Array structure along with all the data it holds.
+ *
+ * @note
+ * This function should not be called on a array that has some of its elements
+ * allocated on the stack.
+ *
+ * @param[in] ar the array that is being destroyed
+ */
+void cc_array_destroy_cb(CC_Array *ar, void (*cb)(void *)) {
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        cb(ar->buffer[i]);
+    }
+
+    cc_array_destroy(ar);
+}
+
+/**
+ * Adds a new element to the CC_Array. The element is appended to the array making
+ * it the last element (the one with the highest index) of the CC_Array.
+ *
+ * @param[in] ar the array to which the element is being added
+ * @param[in] element the element that is being added
+ *
+ * @return CC_OK if the element was successfully added, CC_ERR_ALLOC if the
+ * memory allocation for the new element failed, or CC_ERR_MAX_CAPACITY if the
+ * array is already at maximum capacity.
+ */
+enum cc_stat cc_array_add(CC_Array *ar, void *element) {
+    if (ar->size >= ar->capacity) {
+        enum cc_stat status = expand_array_capacity(ar);
+        if (status != CC_OK) {
+            return status;
+        }
+    }
+
+    ar->buffer[ar->size] = element;
+    ar->size++;
+
+    return CC_OK;
+}
+
+/**
+ * Adds a new element to the array at a specified position by shifting all
+ * subsequent elements by one. The specified index must be within the bounds
+ * of the array. This function may also fail if the memory allocation for
+ * the new element was unsuccessful.
+ *
+ * @param[in] ar the array to which the element is being added
+ * @param[in] element the element that is being added
+ * @param[in] index the position in the array at which the element is being
+ *            added
+ *
+ * @return CC_OK if the element was successfully added, CC_ERR_OUT_OF_RANGE if
+ * the specified index was not in range, CC_ERR_ALLOC if the memory
+ * allocation for the new element failed, or CC_ERR_MAX_CAPACITY if the
+ * array is already at maximum capacity.
+ */
+enum cc_stat cc_array_add_at(CC_Array *ar, void *element, size_t index) {
+    if (index == ar->size) {
+        return cc_array_add(ar, element);
+    }
+
+    if ((ar->size == 0 && index != 0) || index > (ar->size - 1)) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    if (ar->size >= ar->capacity) {
+        enum cc_stat status = expand_array_capacity(ar);
+        if (status != CC_OK) {
+            return status;
+        }
+    }
+
+    size_t shift = (ar->size - index) * sizeof(void *);
+
+    memmove(&(ar->buffer[index + 1]),
+            &(ar->buffer[index]),
+            shift);
+
+    ar->buffer[index] = element;
+    ar->size++;
+
+    return CC_OK;
+}
+
+/**
+ * Replaces an array element at the specified index and optionally sets the out
+ * parameter to the value of the replaced element. The specified index must be
+ * within the bounds of the CC_Array.
+ *
+ * @param[in]  ar      array whose element is being replaced
+ * @param[in]  element replacement element
+ * @param[in]  index   index at which the replacement element should be inserted
+ * @param[out] out     pointer to where the replaced element is stored, or NULL if
+ *                     it is to be ignored
+ *
+ * @return CC_OK if the element was successfully replaced, or CC_ERR_OUT_OF_RANGE
+ *         if the index was out of range.
+ */
+enum cc_stat cc_array_replace_at(CC_Array *ar, void *element, size_t index, void **out) {
+    if (index >= ar->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    if (out) {
+        *out = ar->buffer[index];
+    }
+
+    ar->buffer[index] = element;
+
+    return CC_OK;
+}
+
+enum cc_stat cc_array_swap_at(CC_Array *ar, size_t index1, size_t index2) {
+    void *tmp;
+
+    if (index1 >= ar->size || index2 >= ar->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    tmp = ar->buffer[index1];
+
+    ar->buffer[index1] = ar->buffer[index2];
+    ar->buffer[index2] = tmp;
+    return CC_OK;
+}
+
+/**
+ * Removes the specified element from the CC_Array if such element exists and
+ * optionally sets the out parameter to the value of the removed element.
+ *
+ * @param[in] ar array from which the element is being removed
+ * @param[in] element element being removed
+ * @param[out] out pointer to where the removed value is stored, or NULL
+ *                 if it is to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or
+ * CC_ERR_VALUE_NOT_FOUND if the element was not found.
+ */
+enum cc_stat cc_array_remove(CC_Array *ar, void *element, void **out) {
+    size_t index;
+    enum cc_stat status = cc_array_index_of(ar, element, &index);
+
+    if (status == CC_ERR_OUT_OF_RANGE) {
+        return CC_ERR_VALUE_NOT_FOUND;
+    }
+
+    if (index != ar->size - 1) {
+        size_t block_size = (ar->size - 1 - index) * sizeof(void *);
+
+        memmove(&(ar->buffer[index]),
+                &(ar->buffer[index + 1]),
+                block_size);
+    }
+    ar->size--;
+
+    if (out) {
+        *out = element;
+    }
+
+    return CC_OK;
+}
+
+/**
+ * Removes a CC_Array element without preserving order and optionally sets the
+ * out parameter to the value of the removed element. The last element of the
+ * array is moved to the index of the element being removed, and the last
+ * element is removed.
+ *
+ * @param[in] ar the array whose last element is being removed
+ * @param[out] out pointer to where the removed value is stored, or NULL if it is
+ *                 to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE
+ * if the CC_Array is already empty.
+ */
+enum cc_stat cc_array_remove_fast(CC_Array *ar, void *element, void **out) {
+    size_t index = 0;
+    const enum cc_stat status = cc_array_index_of(ar, element, &index);
+    if (status != CC_OK) {
+        return status;
+    }
+
+    if (out) {
+        *out = ar->buffer[index];
+    }
+
+    ar->buffer[index] = ar->buffer[ar->size - 1];
+    ar->size--;
+
+    return CC_OK;
+}
+
+/**
+ * Removes an CC_Array element from the specified index and optionally sets the
+ * out parameter to the value of the removed element. The index must be within
+ * the bounds of the array.
+ *
+ * @param[in] ar the array from which the element is being removed
+ * @param[in] index the index of the element being removed.
+ * @param[out] out  pointer to where the removed value is stored,
+ *                  or NULL if it is to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE
+ * if the index was out of range.
+ */
+enum cc_stat cc_array_remove_at(CC_Array *ar, size_t index, void **out) {
+    if (index >= ar->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    if (out) {
+        *out = ar->buffer[index];
+    }
+
+    if (index != ar->size - 1) {
+        size_t block_size = (ar->size - 1 - index) * sizeof(void *);
+
+        memmove(&(ar->buffer[index]),
+                &(ar->buffer[index + 1]),
+                block_size);
+    }
+    ar->size--;
+
+    return CC_OK;
+}
+
+/**
+ * Removes a CC_Array element from the specified index and optionally sets the
+ * out parameter to the value of the removed element without preserving ordering.
+ * The last element of the array is moved to the index of the element being removed,
+ * and the last element is removed. The index must be within the bounds of the array.
+ *
+ * @param[in] ar the array from which the element is being removed
+ * @param[in] index the index of the element being removed.
+ * @param[out] out  pointer to where the removed value is stored,
+ *                  or NULL if it is to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE
+ * if the index was out of range.
+ */
+enum cc_stat cc_array_remove_fast_at(CC_Array *ar, size_t index, void **out) {
+    if (index >= ar->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    if (out) {
+        *out = ar->buffer[index];
+    }
+
+    ar->buffer[index] = ar->buffer[ar->size - 1];
+    ar->size--;
+
+    return CC_OK;
+}
+
+/**
+ * Removes an CC_Array element from the end of the array and optionally sets the
+ * out parameter to the value of the removed element.
+ *
+ * @param[in] ar the array whose last element is being removed
+ * @param[out] out pointer to where the removed value is stored, or NULL if it is
+ *                 to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE
+ * if the CC_Array is already empty.
+ */
+enum cc_stat cc_array_remove_last(CC_Array *ar, void **out) {
+    return cc_array_remove_at(ar, ar->size - 1, out);
+}
+
+/**
+ * Removes all elements from the specified array. This function does not shrink
+ * the array capacity.
+ *
+ * @param[in] ar array from which all elements are to be removed
+ */
+void cc_array_remove_all(CC_Array *ar) {
+    ar->size = 0;
+}
+
+/**
+ * Removes and frees all elements from the specified array. This function does
+ * not shrink the array capacity.
+ *
+ * @param[in] ar array from which all elements are to be removed
+ */
+void cc_array_remove_all_free(CC_Array *ar) {
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        free(ar->buffer[i]);
+    }
+
+    cc_array_remove_all(ar);
+}
+
+/**
+ * Gets an CC_Array element from the specified index and sets the out parameter to
+ * its value. The specified index must be within the bounds of the array.
+ *
+ * @param[in] ar the array from which the element is being retrieved
+ * @param[in] index the index of the array element
+ * @param[out] out pointer to where the element is stored
+ *
+ * @return CC_OK if the element was found, or CC_ERR_OUT_OF_RANGE if the index
+ * was out of range.
+ */
+enum cc_stat cc_array_get_at(const CC_Array *ar, size_t index, void **out) {
+    if (index >= ar->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    *out = ar->buffer[index];
+    return CC_OK;
+}
+
+/**
+ * Gets the last element of the array or the element at the highest index
+ * and sets the out parameter to its value.
+ *
+ * @param[in] ar the array whose last element is being returned
+ * @param[out] out pointer to where the element is stored
+ *
+ * @return CC_OK if the element was found, or CC_ERR_VALUE_NOT_FOUND if the
+ * CC_Array is empty.
+ */
+enum cc_stat cc_array_get_last(const CC_Array *ar, void **out) {
+    if (ar->size == 0) {
+        return CC_ERR_VALUE_NOT_FOUND;
+    }
+
+    return cc_array_get_at(ar, ar->size - 1, out);
+}
+
+/**
+ * Returns the underlying array buffer.
+ *
+ * @note Any direct modification of the buffer may invalidate the CC_Array.
+ *
+ * @param[in] ar array whose underlying buffer is being returned
+ *
+ * @return array's internal buffer.
+ */
+const void *const *cc_array_get_buffer(CC_Array *ar) {
+    return (const void *const *)ar->buffer;
+}
+
+/**
+ * Gets the index of the specified element. The returned index is the index
+ * of the first occurrence of the element starting from the beginning of the
+ * CC_Array.
+ *
+ * @param[in] ar array being searched
+ * @param[in] element the element whose index is being looked up
+ * @param[out] index  pointer to where the index is stored
+ *
+ * @return CC_OK if the index was found, or CC_OUT_OF_RANGE if not.
+ */
+enum cc_stat cc_array_index_of(const CC_Array *ar, void *element, size_t *index) {
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        if (ar->buffer[i] == element) {
+            *index = i;
+            return CC_OK;
+        }
+    }
+    return CC_ERR_OUT_OF_RANGE;
+}
+
+/**
+ * Creates a subarray of the specified CC_Array, ranging from <code>b</code>
+ * index (inclusive) to <code>e</code> index (inclusive). The range indices
+ * must be within the bounds of the CC_Array, while the <code>e</code> index
+ * must be greater or equal to the <code>b</code> index.
+ *
+ * @note The new CC_Array is allocated using the original CC_Array's allocators
+ *       and it also inherits the configuration of the original CC_Array.
+ *
+ * @param[in] ar array from which the subarray is being created
+ * @param[in] b the beginning index (inclusive) of the subarray that must be
+ *              within the bounds of the array and must not exceed the
+ *              the end index
+ * @param[in] e the end index (inclusive) of the subarray that must be within
+ *              the bounds of the array and must be greater or equal to the
+ *              beginning index
+ * @param[out] out pointer to where the new sublist is stored
+ *
+ * @return CC_OK if the subarray was successfully created, CC_ERR_INVALID_RANGE
+ * if the specified index range is invalid, or CC_ERR_ALLOC if the memory allocation
+ * for the new subarray failed.
+ */
+enum cc_stat cc_array_subarray(CC_Array *ar, size_t b, size_t e, CC_Array **out) {
+    if (b > e || e >= ar->size) {
+        return CC_ERR_INVALID_RANGE;
+    }
+
+    CC_Array *sub_ar = (CC_Array *)ar->mem_calloc(1, sizeof(CC_Array));
+
+    if (!sub_ar) {
+        return CC_ERR_ALLOC;
+    }
+
+    /* Try to allocate the buffer */
+    if (!(sub_ar->buffer = (void **)ar->mem_alloc(ar->capacity * sizeof(void *)))) {
+        ar->mem_free(sub_ar);
+        return CC_ERR_ALLOC;
+    }
+
+    sub_ar->mem_alloc = ar->mem_alloc;
+    sub_ar->mem_calloc = ar->mem_calloc;
+    sub_ar->mem_free = ar->mem_free;
+    sub_ar->size = e - b + 1;
+    sub_ar->capacity = sub_ar->size;
+
+    memcpy(sub_ar->buffer,
+           &(ar->buffer[b]),
+           sub_ar->size * sizeof(void *));
+
+    *out = sub_ar;
+    return CC_OK;
+}
+
+/**
+ * Creates a shallow copy of the specified CC_Array. A shallow copy is a copy of
+ * the CC_Array structure, but not the elements it holds.
+ *
+ * @note The new CC_Array is allocated using the original CC_Array's allocators
+ *       and it also inherits the configuration of the original array.
+ *
+ * @param[in] ar the array to be copied
+ * @param[out] out pointer to where the newly created copy is stored
+ *
+ * @return CC_OK if the copy was successfully created, or CC_ERR_ALLOC if the
+ * memory allocation for the copy failed.
+ */
+enum cc_stat cc_array_copy_shallow(CC_Array *ar, CC_Array **out) {
+    CC_Array *copy = (CC_Array *)ar->mem_alloc(sizeof(CC_Array));
+
+    if (!copy) {
+        return CC_ERR_ALLOC;
+    }
+
+    if (!(copy->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) {
+        ar->mem_free(copy);
+        return CC_ERR_ALLOC;
+    }
+    copy->exp_factor = ar->exp_factor;
+    copy->size = ar->size;
+    copy->capacity = ar->capacity;
+    copy->mem_alloc = ar->mem_alloc;
+    copy->mem_calloc = ar->mem_calloc;
+    copy->mem_free = ar->mem_free;
+
+    memcpy(copy->buffer,
+           ar->buffer,
+           copy->size * sizeof(void *));
+
+    *out = copy;
+    return CC_OK;
+}
+
+/**
+ * Creates a deep copy of the specified CC_Array. A deep copy is a copy of
+ * both the CC_Array structure and the data it holds.
+ *
+ * @note The new CC_Array is allocated using the original CC_Array's allocators
+ *       and it also inherits the configuration of the original CC_Array.
+ *
+ * @param[in] ar   array to be copied
+ * @param[in] cp   the copy function that should return a pointer to the copy of
+ *                 the data
+ * @param[out] out pointer to where the newly created copy is stored
+ *
+ * @return CC_OK if the copy was successfully created, or CC_ERR_ALLOC if the
+ * memory allocation for the copy failed.
+ */
+enum cc_stat cc_array_copy_deep(CC_Array *ar, void *(*cp)(void *), CC_Array **out) {
+    CC_Array *copy = (CC_Array *)ar->mem_alloc(sizeof(CC_Array));
+
+    if (!copy) {
+        return CC_ERR_ALLOC;
+    }
+
+    if (!(copy->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) {
+        ar->mem_free(copy);
+        return CC_ERR_ALLOC;
+    }
+
+    copy->exp_factor = ar->exp_factor;
+    copy->size = ar->size;
+    copy->capacity = ar->capacity;
+    copy->mem_alloc = ar->mem_alloc;
+    copy->mem_calloc = ar->mem_calloc;
+    copy->mem_free = ar->mem_free;
+
+    size_t i;
+    for (i = 0; i < copy->size; i++) {
+        copy->buffer[i] = cp(ar->buffer[i]);
+    }
+
+    *out = copy;
+
+    return CC_OK;
+}
+
+/**
+ * Filters the CC_Array by modifying it. It removes all elements that don't
+ * return true on pred(element).
+ *
+ * @param[in] ar   array that is to be filtered
+ * @param[in] pred predicate function which returns true if the element should
+ *                 be kept in the CC_Array
+ *
+ * @return CC_OK if the CC_Array was filtered successfully, or CC_ERR_OUT_OF_RANGE
+ * if the CC_Array is empty.
+ */
+enum cc_stat cc_array_filter_mut(CC_Array *ar, bool (*pred)(const void *)) {
+    if (ar->size == 0) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    size_t rm = 0;
+    size_t keep = 0;
+
+    /* Look for clusters of non matching elements before moving
+     * in order to minimize the number of memmoves */
+    for (size_t i = ar->size - 1; i != ((size_t)-1); i--) {
+        if (!pred(ar->buffer[i])) {
+            rm++;
+            continue;
+        }
+        if (rm > 0) {
+            if (keep > 0) {
+                size_t block_size = keep * sizeof(void *);
+                memmove(&(ar->buffer[i + 1]),
+                        &(ar->buffer[i + 1 + rm]),
+                        block_size);
+            }
+            ar->size -= rm;
+            rm = 0;
+        }
+        keep++;
+    }
+    /* Remove any remaining elements*/
+    if (rm > 0) {
+        size_t block_size = keep * sizeof(void *);
+        memmove(&(ar->buffer[0]),
+                &(ar->buffer[rm]),
+                block_size);
+
+        ar->size -= rm;
+    }
+    return CC_OK;
+}
+
+/**
+ * Filters the CC_Array by creating a new CC_Array that contains all elements from the
+ * original CC_Array that return true on pred(element) without modifying the original
+ * CC_Array.
+ *
+ * @param[in] ar   array that is to be filtered
+ * @param[in] pred predicate function which returns true if the element should
+ *                 be kept in the filtered array
+ * @param[out] out pointer to where the new filtered CC_Array is to be stored
+ *
+ * @return CC_OK if the CC_Array was filtered successfully, CC_ERR_OUT_OF_RANGE
+ * if the CC_Array is empty, or CC_ERR_ALLOC if the memory allocation for the
+ * new CC_Array failed.
+ */
+enum cc_stat cc_array_filter(CC_Array *ar, bool (*pred)(const void *), CC_Array **out) {
+    if (ar->size == 0) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    CC_Array *filtered = (CC_Array *)ar->mem_alloc(sizeof(CC_Array));
+
+    if (!filtered) {
+        return CC_ERR_ALLOC;
+    }
+
+    if (!(filtered->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) {
+        ar->mem_free(filtered);
+        return CC_ERR_ALLOC;
+    }
+
+    filtered->exp_factor = ar->exp_factor;
+    filtered->size = 0;
+    filtered->capacity = ar->capacity;
+    filtered->mem_alloc = ar->mem_alloc;
+    filtered->mem_calloc = ar->mem_calloc;
+    filtered->mem_free = ar->mem_free;
+
+    size_t f = 0;
+    for (size_t i = 0; i < ar->size; i++) {
+        if (pred(ar->buffer[i])) {
+            filtered->buffer[f++] = ar->buffer[i];
+            filtered->size++;
+        }
+    }
+    *out = filtered;
+
+    return CC_OK;
+}
+
+/**
+ * Reverses the order of elements in the specified array.
+ *
+ * @param[in] ar array that is being reversed
+ */
+void cc_array_reverse(CC_Array *ar) {
+    if (ar->size == 0) {
+        return;
+    }
+
+    size_t i;
+    size_t j;
+    for (i = 0, j = ar->size - 1; i < ar->size / 2; i++, j--) {
+        void *tmp = ar->buffer[i];
+        ar->buffer[i] = ar->buffer[j];
+        ar->buffer[j] = tmp;
+    }
+}
+
+/**
+ * Trims the array's capacity, in other words, it shrinks the capacity to match
+ * the number of elements in the CC_Array, however the capacity will never shrink
+ * below 1.
+ *
+ * @param[in] ar array whose capacity is being trimmed
+ *
+ * @return CC_OK if the capacity was trimmed successfully, or CC_ERR_ALLOC if
+ * the reallocation failed.
+ */
+enum cc_stat cc_array_trim_capacity(CC_Array *ar) {
+    if (ar->size == ar->capacity) {
+        return CC_OK;
+    }
+
+    void **new_buff = (void **)ar->mem_calloc(ar->size, sizeof(void *));
+
+    if (!new_buff) {
+        return CC_ERR_ALLOC;
+    }
+
+    size_t size = ar->size < 1 ? 1 : ar->size;
+
+    memcpy(new_buff, ar->buffer, size * sizeof(void *));
+    ar->mem_free(ar->buffer);
+
+    ar->buffer = new_buff;
+    ar->capacity = ar->size;
+
+    return CC_OK;
+}
+
+/**
+ * Returns the number of occurrences of the element within the specified CC_Array.
+ *
+ * @param[in] ar array that is being searched
+ * @param[in] element the element that is being searched for
+ *
+ * @return the number of occurrences of the element.
+ */
+size_t cc_array_contains(const CC_Array *ar, void *element) {
+    size_t o = 0;
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        if (ar->buffer[i] == element) {
+            o++;
+        }
+    }
+    return o;
+}
+
+/**
+ * Returns the number of occurrences of the value pointed to by <code>e</code>
+ * within the specified CC_Array.
+ *
+ * @param[in] ar array that is being searched
+ * @param[in] element the element that is being searched for
+ * @param[in] cmp comparator function which returns 0 if the values passed to it are equal
+ *
+ * @return the number of occurrences of the value.
+ */
+size_t cc_array_contains_value(const CC_Array *ar, void *element, int (*cmp)(const void *, const void *)) {
+    size_t o = 0;
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        if (cmp(element, ar->buffer[i]) == 0) {
+            o++;
+        }
+    }
+    return o;
+}
+
+/**
+ * Returns the size of the specified CC_Array. The size of the array is the
+ * number of elements contained within the CC_Array.
+ *
+ * @param[in] ar array whose size is being returned
+ *
+ * @return the the number of element within the CC_Array.
+ */
+size_t cc_array_size(const CC_Array *ar) {
+    return ar->size;
+}
+
+/**
+ * Returns the capacity of the specified CC_Array. The capacity of the CC_Array is
+ * the maximum number of elements an CC_Array can hold before it has to be resized.
+ *
+ * @param[in] ar array whose capacity is being returned
+ *
+ * @return the capacity of the CC_Array.
+ */
+size_t cc_array_capacity(const CC_Array *ar) {
+    return ar->capacity;
+}
+
+/**
+ * Sorts the specified array.
+ *
+ * @note
+ * Pointers passed to the comparator function will be pointers to the array
+ * elements that are of type (void*) ie. void**. So an extra step of
+ * dereferencing will be required before the data can be used for comparison:
+ * eg. <code>my_type e = *(*((my_type**) ptr));</code>.
+ *
+ * @code
+ * enum cc_stat mycmp(const void *e1, const void *e2) {
+ *     MyType el1 = *(*((enum cc_stat**) e1));
+ *     MyType el2 = *(*((enum cc_stat**) e2));
+ *
+ *     if (el1 < el2) return -1;
+ *     if (el1 > el2) return 1;
+ *     return 0;
+ * }
+ *
+ * ...
+ *
+ * cc_array_sort(array, mycmp);
+ * @endcode
+ *
+ * @param[in] ar  array to be sorted
+ * @param[in] cmp the comparator function that must be of type <code>
+ *                enum cc_stat cmp(const void e1*, const void e2*)</code> that
+ *                returns < 0 if the first element goes before the second,
+ *                0 if the elements are equal and > 0 if the second goes
+ *                before the first
+ */
+void cc_array_sort(CC_Array *ar, int (*cmp)(const void *, const void *)) {
+    qsort(ar->buffer, ar->size, sizeof(void *), cmp);
+}
+
+/**
+ * Expands the CC_Array capacity. This might fail if the the new buffer
+ * cannot be allocated. In case the expansion would overflow the index
+ * range, a maximum capacity buffer is allocated instead. If the capacity
+ * is already at the maximum capacity, no new buffer is allocated.
+ *
+ * @param[in] ar array whose capacity is being expanded
+ *
+ * @return CC_OK if the buffer was expanded successfully, CC_ERR_ALLOC if
+ * the memory allocation for the new buffer failed, or CC_ERR_MAX_CAPACITY
+ * if the array is already at maximum capacity.
+ */
+static enum cc_stat expand_array_capacity(CC_Array *ar) {
+    if (ar->capacity == CC_MAX_ELEMENTS) {
+        return CC_ERR_MAX_CAPACITY;
+    }
+
+    size_t new_capacity = (size_t)(ar->capacity * ar->exp_factor);
+
+    /* As long as the capacity is greater that the expansion factor
+     * at the point of overflow, this is check is valid. */
+    if (new_capacity <= ar->capacity) {
+        ar->capacity = CC_MAX_ELEMENTS;
+    } else {
+        ar->capacity = new_capacity;
+    }
+
+    void **new_buff = (void **)ar->mem_alloc(ar->capacity * sizeof(void *));
+
+    if (!new_buff) {
+        return CC_ERR_ALLOC;
+    }
+
+    memcpy(new_buff, ar->buffer, ar->size * sizeof(void *));
+
+    ar->mem_free(ar->buffer);
+    ar->buffer = new_buff;
+
+    return CC_OK;
+}
+
+/**
+ * Applies the function fn to each element of the CC_Array.
+ *
+ * @param[in] ar array on which this operation is performed
+ * @param[in] fn operation function that is to be invoked on each CC_Array
+ *               element
+ */
+void cc_array_map(CC_Array *ar, void (*fn)(void *e)) {
+    size_t i;
+    for (i = 0; i < ar->size; i++) {
+        fn(ar->buffer[i]);
+    }
+}
+
+/**
+ * A fold/reduce function that collects all of the elements in the array
+ * together. For example, if we have an array of [a,b,c...] the end result
+ * will be (...((a+b)+c)+...).
+ *
+ * @param[in] ar the array on which this operation is performed
+ * @param[in] fn the operation function that is to be invoked on each array
+ *               element
+ * @param[in] result the pointer which will collect the end result
+ */
+void cc_array_reduce(CC_Array *ar, void (*fn)(void *, void *, void *), void *result) {
+    if (ar->size == 1) {
+        fn(ar->buffer[0], NULL, result);
+        return;
+    }
+    if (ar->size > 1) {
+        fn(ar->buffer[0], ar->buffer[1], result);
+    }
+
+    for (size_t i = 2; i < ar->size; i++) {
+        fn(result, ar->buffer[i], result);
+    }
+}
+
+/**
+ * Initializes the iterator.
+ *
+ * @param[in] iter the iterator that is being initialized
+ * @param[in] ar the array to iterate over
+ */
+void cc_array_iter_init(CC_ArrayIter *iter, CC_Array *ar) {
+    iter->ar = ar;
+    iter->index = 0;
+    iter->last_removed = false;
+}
+
+/**
+ * Advances the iterator and sets the out parameter to the value of the
+ * next element in the sequence.
+ *
+ * @param[in] iter the iterator that is being advanced
+ * @param[out] out pointer to where the next element is set
+ *
+ * @return CC_OK if the iterator was advanced, or CC_ITER_END if the
+ * end of the CC_Array has been reached.
+ */
+enum cc_stat cc_array_iter_next(CC_ArrayIter *iter, void **out) {
+    if (iter->index >= iter->ar->size) {
+        return CC_ITER_END;
+    }
+
+    *out = iter->ar->buffer[iter->index];
+
+    iter->index++;
+    iter->last_removed = false;
+
+    return CC_OK;
+}
+
+/**
+ * Removes the last returned element by <code>cc_array_iter_next()</code>
+ * function without invalidating the iterator and optionally sets the out
+ * parameter to the value of the removed element.
+ *
+ * @note This function should only ever be called after a call to <code>
+ * cc_array_iter_next()</code>.
+
+ * @param[in] iter the iterator on which this operation is being performed
+ * @param[out] out pointer to where the removed element is stored, or NULL
+ *                 if it is to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or
+ * CC_ERR_VALUE_NOT_FOUND.
+ */
+enum cc_stat cc_array_iter_remove(CC_ArrayIter *iter, void **out) {
+    enum cc_stat status = CC_ERR_VALUE_NOT_FOUND;
+
+    if (!iter->last_removed) {
+        status = cc_array_remove_at(iter->ar, iter->index - 1, out);
+        if (status != CC_OK) {
+            return status;
+        }
+
+        iter->last_removed = true;
+        if (iter->index > 0) {
+            iter->index--;
+        }
+    }
+    return status;
+}
+
+/**
+ * Removes the last returned element by <code>cc_array_iter_next()</code>
+ * function without invalidating the iterator and optionally sets the out
+ * parameter to the value of the removed element. The order of the array
+ * is not preserved, the last element of the array is moved to the index
+ * of the last returned element and the last element is removed.
+ *
+ * @note This function should only ever be called after a call to <code>
+ * cc_array_iter_next()</code>.
+
+ * @param[in] iter the iterator on which this operation is being performed
+ * @param[out] out pointer to where the removed element is stored, or NULL
+ *                 if it is to be ignored
+ *
+ * @return CC_OK if the element was successfully removed, or
+ * CC_ERR_VALUE_NOT_FOUND.
+ */
+enum cc_stat cc_array_iter_remove_fast(CC_ArrayIter *iter, void **out) {
+    enum cc_stat status = CC_ERR_VALUE_NOT_FOUND;
+
+    if (!iter->last_removed) {
+        status = cc_array_remove_fast_at(iter->ar, iter->index - 1, out);
+        if (status != CC_OK) {
+            return status;
+        }
+
+        iter->last_removed = true;
+        if (iter->index > 0) {
+            iter->index--;
+        }
+    }
+    return status;
+}
+
+/**
+ * Adds a new element to the CC_Array after the last returned element by
+ * <code>cc_array_iter_next()</code> function without invalidating the
+ * iterator.
+ *
+ * @note This function should only ever be called after a call to <code>
+ * cc_array_iter_next()</code>.
+ *
+ * @param[in] iter the iterator on which this operation is being performed
+ * @param[in] element the element being added
+ *
+ * @return CC_OK if the element was successfully added, CC_ERR_ALLOC if the
+ * memory allocation for the new element failed, or CC_ERR_MAX_CAPACITY if
+ * the array is already at maximum capacity.
+ */
+enum cc_stat cc_array_iter_add(CC_ArrayIter *iter, void *element) {
+    return cc_array_add_at(iter->ar, element, iter->index++);
+}
+
+/**
+ * Replaces the last returned element by <code>cc_array_iter_next()</code>
+ * with the specified element and optionally sets the out parameter to
+ * the value of the replaced element.
+ *
+ * @note This function should only ever be called after a call to <code>
+ * cc_array_iter_next()</code>.
+ *
+ * @param[in] iter the iterator on which this operation is being performed
+ * @param[in] element the replacement element
+ * @param[out] out pointer to where the replaced element is stored, or NULL
+ *                if it is to be ignored
+ *
+ * @return CC_OK if the element was replaced successfully, or
+ * CC_ERR_OUT_OF_RANGE.
+ */
+enum cc_stat cc_array_iter_replace(CC_ArrayIter *iter, void *element, void **out) {
+    return cc_array_replace_at(iter->ar, element, iter->index - 1, out);
+}
+
+/**
+ * Returns the index of the last returned element by <code>cc_array_iter_next()
+ * </code>.
+ *
+ * @note
+ * This function should not be called before a call to <code>cc_array_iter_next()
+ * </code>.
+ *
+ * @param[in] iter the iterator on which this operation is being performed
+ *
+ * @return the index.
+ */
+size_t cc_array_iter_index(CC_ArrayIter *iter) {
+    return iter->index - 1;
+}
+
+/**
+ * Initializes the zip iterator.
+ *
+ * @param[in] iter iterator that is being initialized
+ * @param[in] ar1  first array
+ * @param[in] ar2  second array
+ */
+void cc_array_zip_iter_init(CC_ArrayZipIter *iter, CC_Array *ar1, CC_Array *ar2) {
+    iter->ar1 = ar1;
+    iter->ar2 = ar2;
+    iter->index = 0;
+    iter->last_removed = false;
+}
+
+/**
+ * Outputs the next element pair in the sequence and advances the iterator.
+ *
+ * @param[in]  iter iterator that is being advanced
+ * @param[out] out1 output of the first array element
+ * @param[out] out2 output of the second array element
+ *
+ * @return CC_OK if a next element pair is returned, or CC_ITER_END if the end of one
+ * of the arrays has been reached.
+ */
+enum cc_stat cc_array_zip_iter_next(CC_ArrayZipIter *iter, void **out1, void **out2) {
+    if (iter->index >= iter->ar1->size || iter->index >= iter->ar2->size) {
+        return CC_ITER_END;
+    }
+
+    *out1 = iter->ar1->buffer[iter->index];
+    *out2 = iter->ar2->buffer[iter->index];
+
+    iter->index++;
+    iter->last_removed = false;
+
+    return CC_OK;
+}
+
+/**
+ * Removes and outputs the last returned element pair by <code>cc_array_zip_iter_next()
+ * </code> without invalidating the iterator.
+ *
+ * @param[in]  iter iterator on which this operation is being performed
+ * @param[out] out1 output of the removed element from the first array
+ * @param[out] out2 output of the removed element from the second array
+ *
+ * @return CC_OK if the element was successfully removed, CC_ERR_OUT_OF_RANGE if the
+ * state of the iterator is invalid, or CC_ERR_VALUE_NOT_FOUND if the element was
+ * already removed.
+ */
+enum cc_stat cc_array_zip_iter_remove(CC_ArrayZipIter *iter, void **out1, void **out2) {
+    if ((iter->index - 1) >= iter->ar1->size || (iter->index - 1) >= iter->ar2->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    if (!iter->last_removed) {
+        cc_array_remove_at(iter->ar1, iter->index - 1, out1);
+        cc_array_remove_at(iter->ar2, iter->index - 1, out2);
+        iter->last_removed = true;
+        return CC_OK;
+    }
+    return CC_ERR_VALUE_NOT_FOUND;
+}
+
+/**
+ * Adds a new element pair to the arrays after the last returned element pair by
+ * <code>cc_array_zip_iter_next()</code> and immediately before an element pair
+ * that would be returned by a subsequent call to <code>cc_array_zip_iter_next()</code>
+ * without invalidating the iterator.
+ *
+ * @param[in] iter iterator on which this operation is being performed
+ * @param[in] e1   element added to the first array
+ * @param[in] e2   element added to the second array
+ *
+ * @return CC_OK if the element pair was successfully added to the arrays, or
+ * CC_ERR_ALLOC if the memory allocation for the new elements failed.
+ */
+enum cc_stat cc_array_zip_iter_add(CC_ArrayZipIter *iter, void *e1, void *e2) {
+    size_t index = iter->index++;
+    CC_Array *ar1 = iter->ar1;
+    CC_Array *ar2 = iter->ar2;
+
+    /* Make sure both array buffers have room */
+    if ((ar1->size == ar1->capacity && (expand_array_capacity(ar1) != CC_OK)) ||
+        (ar2->size == ar2->capacity && (expand_array_capacity(ar2) != CC_OK))) {
+        return CC_ERR_ALLOC;
+    }
+
+    cc_array_add_at(ar1, e1, index);
+    cc_array_add_at(ar2, e2, index);
+
+    return CC_OK;
+}
+
+/**
+ * Replaces the last returned element pair by <code>cc_array_zip_iter_next()</code>
+ * with the specified replacement element pair.
+ *
+ * @param[in] iter  iterator on which this operation is being performed
+ * @param[in]  e1   first array's replacement element
+ * @param[in]  e2   second array's replacement element
+ * @param[out] out1 output of the replaced element from the first array
+ * @param[out] out2 output of the replaced element from the second array
+ *
+ * @return CC_OK if the element was successfully replaced, or CC_ERR_OUT_OF_RANGE.
+ */
+enum cc_stat cc_array_zip_iter_replace(CC_ArrayZipIter *iter, void *e1, void *e2, void **out1, void **out2) {
+    if ((iter->index - 1) >= iter->ar1->size || (iter->index - 1) >= iter->ar2->size) {
+        return CC_ERR_OUT_OF_RANGE;
+    }
+
+    cc_array_replace_at(iter->ar1, e1, iter->index - 1, out1);
+    cc_array_replace_at(iter->ar2, e2, iter->index - 1, out2);
+
+    return CC_OK;
+}
+
+/**
+ * Returns the index of the last returned element pair by <code>cc_array_zip_iter_next()</code>.
+ *
+ * @param[in] iter iterator on which this operation is being performed
+ *
+ * @return current iterator index.
+ */
+size_t cc_array_zip_iter_index(CC_ArrayZipIter *iter) {
+    return iter->index - 1;
+}
+
+size_t cc_array_struct_size() {
+    return sizeof(CC_Array);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/vendor/cc_common.h b/vendor/cc_common.h
new file mode 100644
index 0000000000..1740460646
--- /dev/null
+++ b/vendor/cc_common.h
@@ -0,0 +1,75 @@
+/*
+ * Collections-C
+ * Copyright (C) 2013-2014 Srđan Panić <srdja.panic@gmail.com>
+ *
+ * This file is part of Collections-C.
+ *
+ * Collections-C is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Collections-C is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Collections-C.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef CC_COMMON_H
+#define CC_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef ARCH_64
+#define MAX_POW_TWO (((size_t)1) << 63)
+#else
+#define MAX_POW_TWO (((size_t)1) << 31)
+#endif /* ARCH_64 */
+
+enum cc_stat {
+    CC_OK = 0,
+
+    CC_ERR_ALLOC = 1,
+    CC_ERR_INVALID_CAPACITY = 2,
+    CC_ERR_INVALID_RANGE = 3,
+    CC_ERR_MAX_CAPACITY = 4,
+    CC_ERR_KEY_NOT_FOUND = 6,
+    CC_ERR_VALUE_NOT_FOUND = 7,
+    CC_ERR_OUT_OF_RANGE = 8,
+
+    CC_ITER_END = 9,
+};
+
+#define CC_MAX_ELEMENTS ((size_t) - 2)
+
+#if defined(_MSC_VER)
+
+#define INLINE __inline
+#define FORCE_INLINE __forceinline
+
+#else
+
+#define INLINE inline
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+#endif /* _MSC_VER */
+
+int cc_common_cmp_str(const void *key1, const void *key2);
+
+#define CC_CMP_STRING cc_common_cmp_str
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/vendor/dlmalloc.h b/vendor/dlmalloc.h
new file mode 100644
index 0000000000..4ef7c9cfd5
--- /dev/null
+++ b/vendor/dlmalloc.h
@@ -0,0 +1,6264 @@
+/*
+  Default header file for malloc-2.7.2, written by Doug Lea
+  and released to the public domain.  Use, modify, and redistribute
+  this code without permission or acknowledgement in any way you wish.
+  Send questions, comments, complaints, performance data, etc to
+  dl@cs.oswego.edu.
+
+  last update: Sun Feb 25 18:38:11 2001  Doug Lea  (dl at gee)
+
+  This header is for ANSI C/C++ only.  You can set either of
+  the following #defines before including:
+
+  * If USE_DL_PREFIX is defined, it is assumed that malloc.c
+    was also compiled with this option, so all routines
+    have names starting with "dl".
+
+  * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this
+    file will be #included AFTER <malloc.h>. This is needed only if
+    your system defines a struct mallinfo that is incompatible with the
+    standard one declared here.  Otherwise, you can include this file
+    INSTEAD of your system system <malloc.h>.  At least on ANSI, all
+    declarations should be compatible with system versions
+*/
+
+#ifndef MALLOC_270_H
+#define MALLOC_270_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> /* for size_t */
+
+#define USE_DL_PREFIX
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. The minimum size
+  is 16 bytes on most 32bit systems, and either 24 or 32 bytes on
+  64bit systems, depending on internal size and alignment restrictions.
+
+  On most systems, size_t is an unsigned type.  Calls with values of n
+  that appear "negative" when signed are interpreted as requests for
+  huge amounts of space, which will most often fail.
+
+  The maximum allowed value of n differs across systems, but is in all
+  cases less (typically by 8K) than the maximum representable value of
+  a size_t. Requests greater than this value result in failure.
+*/
+
+#ifndef USE_DL_PREFIX
+void *malloc(size_t);
+#else
+void *dlmalloc(size_t);
+#endif
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (and bad!)
+  effects if p has already been freed or was not obtained via malloc.
+
+  Unless disabled using mallopt, freeing very large spaces will,
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+#ifndef USE_DL_PREFIX
+void free(void *);
+#else
+void dlfree(void *);
+#endif
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+#ifndef USE_DL_PREFIX
+void *calloc(size_t, size_t);
+#else
+void *dlcalloc(size_t, size_t);
+#endif
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+
+#ifndef USE_DL_PREFIX
+void *realloc(void *, size_t);
+#else
+void *dlrealloc(void *, size_t);
+#endif
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+
+#ifndef USE_DL_PREFIX
+void *memalign(size_t, size_t);
+#else
+void *dlmemalign(size_t, size_t);
+#endif
+
+/*
+  valloc(size_t n);
+  Allocates a page-aligned chunk of at least n bytes.
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+
+#ifndef USE_DL_PREFIX
+void *valloc(size_t);
+#else
+void *dlvalloc(size_t);
+#endif
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements, each of which can hold contents of size
+  elem_size.  Each element starts out cleared, and can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) return 0; // failure
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+
+#ifndef USE_DL_PREFIX
+void **independent_calloc(size_t, size_t, void **);
+#else
+void **dlindependent_calloc(size_t, size_t, void **);
+#endif
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+
+#ifndef USE_DL_PREFIX
+void **independent_comalloc(size_t, size_t *, void **);
+#else
+void **dlindependent_comalloc(size_t, size_t *, void **);
+#endif
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+
+#ifndef USE_DL_PREFIX
+void *pvalloc(size_t);
+#else
+void *dlpvalloc(size_t);
+#endif
+
+/*
+  cfree(void* p);
+  Equivalent to free(p).
+
+  cfree is needed/defined on some systems that pair it with calloc,
+  for odd historical reasons (such as: cfree is used in example
+  code in the first edition of K&R).
+*/
+
+#ifndef USE_DL_PREFIX
+void cfree(void *);
+#else
+void dlcfree(void *);
+#endif
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+
+#ifndef USE_DL_PREFIX
+int malloc_trim(size_t);
+#else
+int dlmalloc_trim(size_t);
+#endif
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in an allocated
+  chunk, which may be more than you requested (although often not) due
+  to alignment and minimum size constraints.  You can use this many
+  bytes without worrying about overwriting other allocated
+  objects. This is not a particularly great programming practice. But
+  malloc_usable_size can be more useful in debugging and assertions,
+  for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+
+#ifndef USE_DL_PREFIX
+size_t malloc_usable_size(void *);
+#else
+size_t dlmalloc_usable_size(void *);
+#endif
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+
+#ifndef USE_DL_PREFIX
+void malloc_stats(void);
+#else
+void dlmalloc_stats(void);
+#endif
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+               have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  The names of some of these fields don't bear much relation with
+  their contents because this struct was defined as standard in
+  SVID/XPG so reflects the malloc implementation that was then used
+  in SystemV Unix.
+
+  The original SVID version of this struct, defined on most systems
+  with mallinfo, declares all fields as ints. But some others define
+  as unsigned long. If your system defines the fields using a type of
+  different width than listed here, you should #include your system
+  version before including this file.  The struct declaration is
+  suppressed if _MALLOC_H is defined (which is done in most system
+  malloc.h files). You can also suppress it by defining
+  HAVE_USR_INCLUDE_MALLOC_H.
+
+  Because these fields are ints, but internal bookkeeping is done with
+  unsigned longs, the reported values may appear as negative, and may
+  wrap around zero and thus be inaccurate.
+*/
+
+#ifndef HAVE_USR_INCLUDE_MALLOC_H
+#ifndef _MALLOC_H
+struct mallinfo {
+    int arena;
+    int ordblks;
+    int smblks;
+    int hblks;
+    int hblkhd;
+    int usmblks;
+    int fsmblks;
+    int uordblks;
+    int fordblks;
+    int keepcost;
+};
+#endif
+#endif
+
+#ifndef USE_DL_PREFIX
+struct mallinfo mallinfo(void);
+#else
+struct mallinfo mallinfo(void);
+#endif
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+
+#ifndef USE_DL_PREFIX
+int mallopt(int, int);
+#else
+int dlmallopt(int, int);
+#endif
+
+/* Descriptions of tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  You can reduce M_MXFAST to 0 to disable all use of fastbins.  This
+  causes the malloc algorithm to be a closer approximation of
+  fifo-best-fit in all cases, not just for larger requests, but will
+  generally cause it to be slower.
+*/
+
+#ifndef M_MXFAST
+#define M_MXFAST 1
+#endif
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  compiled with TRIM_FASTBINS defined, automatic trimming never takes
+  place upon freeing a chunk with size less than or equal to
+  MXFAST. Trimming is instead delayed until subsequent freeing of
+  larger chunks. However, you can still force an attempted trim by
+  calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD -1
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD -2
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+
+#define M_MMAP_THRESHOLD -3
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  mmap is not supported on a system, the default value is 0, and
+  attempts to set it to non-zero values in mallopt will fail.
+*/
+
+#define M_MMAP_MAX -4
+
+/* Unused SVID2/XPG mallopt options, listed for completeness */
+
+#ifndef M_NBLKS
+#define M_NLBLKS 2 /* UNUSED in this malloc */
+#endif
+#ifndef M_GRAIN
+#define M_GRAIN 3 /* UNUSED in this malloc */
+#endif
+#ifndef M_KEEP
+#define M_KEEP 4 /* UNUSED in this malloc */
+#endif
+
+/*
+  Some malloc.h's declare alloca, even though it is not part of malloc.
+*/
+
+#ifndef _ALLOCA_H
+extern void *alloca(size_t);
+#endif
+
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain.  Use, modify, and
+  redistribute this code without permission or acknowledgement in any
+  way you wish.  Send questions, comments, complaints, performance
+  data, etc to dl@cs.oswego.edu
+
+* VERSION 2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O), and link it into another program. All
+  of the compile-time options default to reasonable values for use on
+  most unix platforms. Compile -DWIN32 for reasonable defaults on windows.
+  You might later want to step through various compile-time and dynamic
+  tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below.
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(Void_t* p);
+    realloc(Void_t* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+    pvalloc(size_t n);
+    cfree(Void_t* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(Void_t* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+                           8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined
+
+       When USE_MALLOC_LOCK is defined, wrappers are created to
+       surround every public call with either a pthread mutex or
+       a win32 spinlock (depending on WIN32). This is not
+       especially fast, and can be a major bottleneck.
+       It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       you would be far better off obtaining ptmalloc, which is
+       derived from a version of this malloc, and is well-tuned for
+       concurrent programs. (See http://www.malloc.de) Note that
+       even when USE_MALLOC_LOCK is defined, you can can guarantee
+       full thread-safety only if no threads acquire memory through
+       direct calls to MORECORE or other system-level allocators.
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and
+    Linux. It is also reported to work on WIN32 platforms.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    __STD_C                    derived from C compiler defines
+    WIN32                      NOT defined
+    HAVE_MEMCPY                defined
+    USE_MEMCPY                 1 if HAVE_MEMCPY is defined
+    HAVE_MMAP                  defined as 1
+    MMAP_CLEARS                1
+    HAVE_MREMAP                0 unless linux defined
+    malloc_getpagesize         derived from system #includes, or 4096 if not
+    HAVE_USR_INCLUDE_MALLOC_H  NOT defined
+    LACKS_UNISTD_H             NOT defined unless WIN32
+    LACKS_SYS_PARAM_H          NOT defined unless WIN32
+    LACKS_SYS_MMAN_H           NOT defined unless WIN32
+    LACKS_FCNTL_H              NOT defined
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T)
+    PTR_UINT                   unsigned long
+    CHUNK_SIZE_T               unsigned long
+
+    Configuration and functionality options:
+
+    USE_DL_PREFIX              NOT defined
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    DL_DEBUG                   NOT defined
+    REALLOC_ZERO_BYTES_FREES   NOT defined
+    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op
+    TRIM_FASTBINS              0
+    FIRST_SORTED_BIN_SIZE      512
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64
+    DEFAULT_TRIM_THRESHOLD     256 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     256 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.
+*/
+
+/*
+  WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix.
+*/
+
+/* #define WIN32 */
+
+#ifdef WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* Win32 doesn't supply or need the following headers */
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+
+/* Use the supplied emulation of sbrk */
+#define MORECORE sbrk
+#define MORECORE_CONTIGUOUS 1
+#define MORECORE_FAILURE ((void *)(-1))
+
+/* Use the supplied emulation of mmap and munmap */
+#define HAVE_MMAP 1
+#define MUNMAP_FAILURE (-1)
+#define MMAP_CLEARS 1
+
+/* These values don't really matter in windows mmap emulation */
+#define MAP_PRIVATE 1
+#define MAP_ANONYMOUS 2
+#define PROT_READ 1
+#define PROT_WRITE 2
+
+/* Emulation functions defined at the end of this file */
+
+/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */
+#ifdef USE_MALLOC_LOCK
+static int slwait(int *sl);
+static int slrelease(int *sl);
+#endif
+
+static long getpagesize(void);
+static long getregionsize(void);
+static void *sbrk(long size);
+static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg);
+static long munmap(void *ptr, long size);
+
+static void vminfo(unsigned long *free, unsigned long *reserved, unsigned long *committed);
+static int cpuinfo(int whole, unsigned long *kernel, unsigned long *user);
+
+#endif
+
+/*
+  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
+  compiler, or a C compiler sufficiently close to ANSI to get away
+  with it.
+*/
+
+#ifndef __STD_C
+#if defined(__STDC__) || defined(_cplusplus)
+#define __STD_C 1
+#else
+#define __STD_C 0
+#endif
+#endif /*__STD_C*/
+
+/*
+  Void_t* is the pointer type that malloc should say it returns
+*/
+
+#ifndef Void_t
+#if (__STD_C || defined(WIN32))
+#define Void_t void
+#else
+#define Void_t char
+#endif
+#endif /*Void_t*/
+
+#if __STD_C
+#include <stddef.h> /* for size_t */
+#else
+#include <sys/types.h>
+#endif
+
+/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
+
+/* #define  LACKS_UNISTD_H */
+
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
+
+/* #define  LACKS_SYS_PARAM_H */
+
+#include <errno.h> /* needed for optional MALLOC_FAILURE_ACTION */
+#include <stdio.h> /* needed for malloc_stats */
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DDL_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The
+  checking is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with DL_DEBUG set will
+  attempt to check every non-mmapped allocated and free chunk in the
+  course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting DL_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting DL_DEBUG does NOT provide an automated mechanism for checking
+  that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#include <assert.h>
+
+/*
+  The unsigned integer type used for comparing any two chunk sizes.
+  This should be at least as wide as size_t, but should not be signed.
+*/
+
+#ifndef CHUNK_SIZE_T
+#define CHUNK_SIZE_T unsigned long
+#endif
+
+/*
+  The unsigned integer type used to hold addresses when they are are
+  manipulated as integers. Except that it is not defined on all
+  systems, intptr_t would suffice.
+*/
+#ifndef PTR_UINT
+#define PTR_UINT unsigned long
+#endif
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ (sizeof(INTERNAL_SIZE_T))
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT (2 * SIZE_SZ)
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  Some people think it should. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+/*   #define REALLOC_ZERO_BYTES_FREES */
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS 0
+#endif
+
+/*
+  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
+  This is necessary when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+*/
+
+/* #define USE_DL_PREFIX */
+
+/*
+  USE_MALLOC_LOCK causes wrapper functions to surround each
+  callable routine with pthread mutex lock/unlock.
+
+  USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined
+*/
+
+/* #define USE_MALLOC_LOCK */
+
+/*
+  If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is
+  actually a wrapper function that first calls MALLOC_PREACTION, then
+  calls the internal routine, and follows it with
+  MALLOC_POSTACTION. This is needed for locking, but you can also use
+  this, without USE_MALLOC_LOCK, for purposes of interception,
+  instrumentation, etc. It is a sad fact that using wrappers often
+  noticeably degrades performance of malloc-intensive programs.
+*/
+
+#ifdef USE_MALLOC_LOCK
+#define USE_PUBLIC_MALLOC_WRAPPERS
+#else
+/* #define USE_PUBLIC_MALLOC_WRAPPERS */
+#endif
+
+/*
+   Two-phase name translation.
+   All of the actual routines are given mangled names.
+   When wrappers are used, they become the public callable versions.
+   When DL_PREFIX is used, the callable names are prefixed.
+*/
+
+#ifndef USE_PUBLIC_MALLOC_WRAPPERS
+#define cALLOc public_cALLOc
+#define fREe public_fREe
+#define cFREe public_cFREe
+#define mALLOc public_mALLOc
+#define mEMALIGn public_mEMALIGn
+#define rEALLOc public_rEALLOc
+#define vALLOc public_vALLOc
+#define pVALLOc public_pVALLOc
+#define mALLINFo public_mALLINFo
+#define mALLOPt public_mALLOPt
+#define mTRIm public_mTRIm
+#define mSTATs public_mSTATs
+#define mUSABLe public_mUSABLe
+#define iCALLOc public_iCALLOc
+#define iCOMALLOc public_iCOMALLOc
+#endif
+
+#ifdef USE_DL_PREFIX
+#define public_cALLOc dlcalloc
+#define public_fREe dlfree
+#define public_cFREe dlcfree
+#define public_mALLOc dlmalloc
+#define public_mEMALIGn dlmemalign
+#define public_rEALLOc dlrealloc
+#define public_vALLOc dlvalloc
+#define public_pVALLOc dlpvalloc
+#define public_mALLINFo dlmallinfo
+#define public_mALLOPt dlmallopt
+#define public_mTRIm dlmalloc_trim
+#define public_mSTATs dlmalloc_stats
+#define public_mUSABLe dlmalloc_usable_size
+#define public_iCALLOc dlindependent_calloc
+#define public_iCOMALLOc dlindependent_comalloc
+#else /* USE_DL_PREFIX */
+#define public_cALLOc calloc
+#define public_fREe free
+#define public_cFREe cfree
+#define public_mALLOc malloc
+#define public_mEMALIGn memalign
+#define public_rEALLOc realloc
+#define public_vALLOc valloc
+#define public_pVALLOc pvalloc
+#define public_mALLINFo mallinfo
+#define public_mALLOPt mallopt
+#define public_mTRIm malloc_trim
+#define public_mSTATs malloc_stats
+#define public_mUSABLe malloc_usable_size
+#define public_iCALLOc independent_calloc
+#define public_iCOMALLOc independent_comalloc
+#endif /* USE_DL_PREFIX */
+
+/*
+  HAVE_MEMCPY should be defined if you are not otherwise using
+  ANSI STD C, but still have memcpy and memset in your C library
+  and want to use them in calloc and realloc. Otherwise simple
+  macro versions are defined below.
+
+  USE_MEMCPY should be defined as 1 if you actually want to
+  have memset and memcpy called. People report that the macro
+  versions are faster than libc versions on some systems.
+
+  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
+  (of <= 36 bytes) are manually unrolled in realloc and calloc.
+*/
+
+#define HAVE_MEMCPY
+
+#ifndef USE_MEMCPY
+#ifdef HAVE_MEMCPY
+#define USE_MEMCPY 1
+#else
+#define USE_MEMCPY 0
+#endif
+#endif
+
+#if (__STD_C || defined(HAVE_MEMCPY))
+
+#ifdef WIN32
+/* On Win32 memset and memcpy are already declared in windows.h */
+#else
+#if __STD_C
+void *memset(void *, int, size_t);
+void *memcpy(void *, const void *, size_t);
+#else
+Void_t *memset();
+Void_t *memcpy();
+#endif
+#endif
+#endif
+
+/*
+  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
+  malloc fails to be able to return memory, either because memory is
+  exhausted or because of illegal arguments.
+
+  By default, sets errno if running on STD_C platform, else does nothing.
+*/
+
+#ifndef MALLOC_FAILURE_ACTION
+#if __STD_C
+#define MALLOC_FAILURE_ACTION \
+    errno = ENOMEM;
+
+#else
+#define MALLOC_FAILURE_ACTION
+#endif
+#endif
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+#ifdef LACKS_UNISTD_H
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+#if __STD_C
+extern Void_t *sbrk(ptrdiff_t);
+#else
+extern Void_t *sbrk();
+#endif
+#endif
+#endif
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+// #define _GNU_SOURCE
+// #include <unistd.h>
+extern void *sbrk(intptr_t __delta) __THROW;
+#define MORECORE sbrk
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*
+  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
+  allocate very large blocks.  These will be returned to the
+  operating system immediately after a free(). Also, if mmap
+  is available, it is used as a backup strategy in cases where
+  MORECORE fails to provide space from system.
+
+  This malloc is best tuned to work with mmap for large requests.
+  If you do not have mmap, operations involving very large chunks (1MB
+  or so) may be slower than you'd like.
+*/
+
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif
+
+#if HAVE_MMAP
+/*
+   Standard unix mmap using /dev/zero clears memory so calloc doesn't
+   need to.
+*/
+
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif
+
+#else /* no mmap */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 0
+#endif
+#endif
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup (which is done only if
+   HAVE_MMAP).  The value must be a multiple of page size.  This
+   backup strategy generally applies only when systems have "holes" in
+   address space, so sbrk cannot perform contiguous expansion, but
+   there is still space available on system.  On systems for which
+   this is known to be useful (i.e. most linux kernels), this occurs
+   only when programs allocate huge amounts of memory.  Between this,
+   and the fact that mmap regions tend to be limited, the size should
+   be large, to avoid too many mmap calls and thus avoid running out
+   of kernel resources.
+*/
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.  This is currently only possible on Linux with
+  kernel versions newer than 1.3.77.
+*/
+
+#ifndef HAVE_MREMAP
+#if defined(linux) && defined(__USE_GNU)
+#define HAVE_MREMAP 1
+#else
+#define HAVE_MREMAP 0
+#endif
+
+#endif /* HAVE_MMAP */
+
+/*
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  Note that this value is
+  cached during initialization into a field of malloc_state. So even
+  if malloc_getpagesize is a function, it is only called once.
+
+  The following mechanics for getpagesize were adapted from bsd/gnu
+  getpagesize.h. If none of the system-probes here apply, a value of
+  4096 is used, which should be OK: If they don't apply, then using
+  the actual value probably doesn't impact performance.
+*/
+
+#ifndef malloc_getpagesize
+
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
+#ifndef _SC_PAGE_SIZE
+#define _SC_PAGE_SIZE _SC_PAGESIZE
+#endif
+#endif
+
+#ifdef _SC_PAGE_SIZE
+#define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#else
+#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+extern size_t getpagesize();
+#define malloc_getpagesize getpagesize()
+#else
+#ifdef WIN32 /* use supplied emulation of getpagesize */
+#define malloc_getpagesize getpagesize()
+#else
+#ifndef LACKS_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifdef EXEC_PAGESIZE
+#define malloc_getpagesize EXEC_PAGESIZE
+#else
+#ifdef NBPG
+#ifndef CLSIZE
+#define malloc_getpagesize NBPG
+#else
+#define malloc_getpagesize (NBPG * CLSIZE)
+#endif
+#else
+#ifdef NBPC
+#define malloc_getpagesize NBPC
+#else
+#ifdef PAGESIZE
+#define malloc_getpagesize PAGESIZE
+#else /* just guess */
+#define malloc_getpagesize (4096)
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
+  version is declared below.  These must be precisely the same for
+  mallinfo() to work.  The original SVID version of this struct,
+  defined on most systems with mallinfo, declares all fields as
+  ints. But some others define as unsigned long. If your system
+  defines the fields using a type of different width than listed here,
+  you must #include your system version and #define
+  HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else
+
+/* SVID2/XPG mallinfo structure */
+
+/*
+  SVID/XPG defines four standard parameter numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt described below.
+*/
+#endif
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+#if __STD_C
+Void_t *public_mALLOc(size_t);
+#else
+Void_t *public_mALLOc();
+#endif
+
+/*
+  free(Void_t* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+#if __STD_C
+void public_fREe(Void_t *);
+#else
+void public_fREe();
+#endif
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+#if __STD_C
+Void_t *public_cALLOc(size_t, size_t);
+#else
+Void_t *public_cALLOc();
+#endif
+
+/*
+  realloc(Void_t* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+#if __STD_C
+Void_t *public_rEALLOc(Void_t *, size_t);
+#else
+Void_t *public_rEALLOc();
+#endif
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+#if __STD_C
+Void_t *public_mEMALIGn(size_t, size_t);
+#else
+Void_t *public_mEMALIGn();
+#endif
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+#if __STD_C
+Void_t *public_vALLOc(size_t);
+#else
+Void_t *public_vALLOc();
+#endif
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         256*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         256*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+#if __STD_C
+int public_mALLOPt(int, int);
+#else
+int public_mALLOPt();
+#endif
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+               have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+#if __STD_C
+struct mallinfo public_mALLINFo(void);
+#else
+struct mallinfo public_mALLINFo();
+#endif
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+#if __STD_C
+Void_t **public_iCALLOc(size_t, size_t, Void_t **);
+#else
+Void_t **public_iCALLOc();
+#endif
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+#if __STD_C
+Void_t **public_iCOMALLOc(size_t, size_t *, Void_t **);
+#else
+Void_t **public_iCOMALLOc();
+#endif
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+#if __STD_C
+Void_t *public_pVALLOc(size_t);
+#else
+Void_t *public_pVALLOc();
+#endif
+
+/*
+  cfree(Void_t* p);
+  Equivalent to free(p).
+
+  cfree is needed/defined on some systems that pair it with calloc,
+  for odd historical reasons (such as: cfree is used in example
+  code in the first edition of K&R).
+*/
+#if __STD_C
+void public_cFREe(Void_t *);
+#else
+void public_cFREe();
+#endif
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  rreturn 0.
+*/
+#if __STD_C
+int public_mTRIm(size_t);
+#else
+int public_mTRIm();
+#endif
+
+/*
+  malloc_usable_size(Void_t* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+#if __STD_C
+size_t public_mUSABLe(Void_t *);
+#else
+size_t public_mUSABLe();
+#endif
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+#if __STD_C
+void public_mSTATs(void);
+#else
+void public_mSTATs(void);
+#endif
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 16 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST 1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST 64
+#endif
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD (0)
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+
+#define M_MMAP_THRESHOLD -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+. Some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  HAVE_MMAP is not set, the default value is 0, and attempts to set it
+  to non-zero values in mallopt will fail.
+*/
+
+#define M_MMAP_MAX -4
+
+#ifndef DEFAULT_MMAP_MAX
+#if HAVE_MMAP
+#define DEFAULT_MMAP_MAX (65536)
+#else
+#define DEFAULT_MMAP_MAX (0)
+#endif
+#endif
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/* --------------------- public wrappers ---------------------- */
+
+#ifdef USE_PUBLIC_MALLOC_WRAPPERS
+
+/* Declare all routines as internal */
+#if __STD_C
+static Void_t *mALLOc(size_t);
+static void fREe(Void_t *);
+static Void_t *rEALLOc(Void_t *, size_t);
+static Void_t *mEMALIGn(size_t, size_t);
+static Void_t *vALLOc(size_t);
+static Void_t *pVALLOc(size_t);
+static Void_t *cALLOc(size_t, size_t);
+static Void_t **iCALLOc(size_t, size_t, Void_t **);
+static Void_t **iCOMALLOc(size_t, size_t *, Void_t **);
+static void cFREe(Void_t *);
+static int mTRIm(size_t);
+static size_t mUSABLe(Void_t *);
+static void mSTATs();
+static int mALLOPt(int, int);
+static struct mallinfo mALLINFo(void);
+#else
+static Void_t *mALLOc();
+static void fREe();
+static Void_t *rEALLOc();
+static Void_t *mEMALIGn();
+static Void_t *vALLOc();
+static Void_t *pVALLOc();
+static Void_t *cALLOc();
+static Void_t **iCALLOc();
+static Void_t **iCOMALLOc();
+static void cFREe();
+static int mTRIm();
+static size_t mUSABLe();
+static void mSTATs();
+static int mALLOPt();
+static struct mallinfo mALLINFo();
+#endif
+
+/*
+  MALLOC_PREACTION and MALLOC_POSTACTION should be
+  defined to return 0 on success, and nonzero on failure.
+  The return value of MALLOC_POSTACTION is currently ignored
+  in wrapper functions since there is no reasonable default
+  action to take on failure.
+*/
+
+#ifdef USE_MALLOC_LOCK
+
+#ifdef WIN32
+
+static int mALLOC_MUTEx;
+#define MALLOC_PREACTION slwait(&mALLOC_MUTEx)
+#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx)
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER;
+
+#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx)
+#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx)
+
+#endif /* USE_MALLOC_LOCK */
+
+#else
+
+/* Substitute anything you like for these */
+
+#define MALLOC_PREACTION (0)
+#define MALLOC_POSTACTION (0)
+
+#endif
+
+Void_t *public_mALLOc(size_t bytes) {
+    Void_t *m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = mALLOc(bytes);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+void public_fREe(Void_t *m) {
+    if (MALLOC_PREACTION != 0) {
+        return;
+    }
+    fREe(m);
+    if (MALLOC_POSTACTION != 0) {
+    }
+}
+
+Void_t *public_rEALLOc(Void_t *m, size_t bytes) {
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = rEALLOc(m, bytes);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t *public_mEMALIGn(size_t alignment, size_t bytes) {
+    Void_t *m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = mEMALIGn(alignment, bytes);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t *public_vALLOc(size_t bytes) {
+    Void_t *m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = vALLOc(bytes);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t *public_pVALLOc(size_t bytes) {
+    Void_t *m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = pVALLOc(bytes);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t *public_cALLOc(size_t n, size_t elem_size) {
+    Void_t *m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = cALLOc(n, elem_size);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t **public_iCALLOc(size_t n, size_t elem_size, Void_t **chunks) {
+    Void_t **m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = iCALLOc(n, elem_size, chunks);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+Void_t **public_iCOMALLOc(size_t n, size_t sizes[], Void_t **chunks) {
+    Void_t **m;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    m = iCOMALLOc(n, sizes, chunks);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+void public_cFREe(Void_t *m) {
+    if (MALLOC_PREACTION != 0) {
+        return;
+    }
+    cFREe(m);
+    if (MALLOC_POSTACTION != 0) {
+    }
+}
+
+int public_mTRIm(size_t s) {
+    int result;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    result = mTRIm(s);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return result;
+}
+
+size_t public_mUSABLe(Void_t *m) {
+    size_t result;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    result = mUSABLe(m);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return result;
+}
+
+void public_mSTATs() {
+    if (MALLOC_PREACTION != 0) {
+        return;
+    }
+    mSTATs();
+    if (MALLOC_POSTACTION != 0) {
+    }
+}
+
+struct mallinfo public_mALLINFo() {
+    struct mallinfo m;
+    if (MALLOC_PREACTION != 0) {
+        struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+        return nm;
+    }
+    m = mALLINFo();
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return m;
+}
+
+int public_mALLOPt(int p, int v) {
+    int result;
+    if (MALLOC_PREACTION != 0) {
+        return 0;
+    }
+    result = mALLOPt(p, v);
+    if (MALLOC_POSTACTION != 0) {
+    }
+    return result;
+}
+
+#endif
+
+/* ------------- Optional versions of memcopy ---------------- */
+
+#if USE_MEMCPY
+
+/*
+  Note: memcpy is ONLY invoked with non-overlapping regions,
+  so the (usually slower) memmove is not needed.
+*/
+
+#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes)
+#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes)
+
+#else /* !USE_MEMCPY */
+
+/* Use Duff's device for good zeroing/copying performance. */
+
+#define MALLOC_ZERO(charp, nbytes)                               \
+    do {                                                         \
+        INTERNAL_SIZE_T *mzp = (INTERNAL_SIZE_T *)(charp);       \
+        CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \
+        long mcn;                                                \
+        if (mctmp < 8)                                           \
+            mcn = 0;                                             \
+        else {                                                   \
+            mcn = (mctmp - 1) / 8;                               \
+            mctmp %= 8;                                          \
+        }                                                        \
+        switch (mctmp) {                                         \
+        case 0:                                                  \
+            for (;;) {                                           \
+                *mzp++ = 0;                                      \
+            case 7:                                              \
+                *mzp++ = 0;                                      \
+            case 6:                                              \
+                *mzp++ = 0;                                      \
+            case 5:                                              \
+                *mzp++ = 0;                                      \
+            case 4:                                              \
+                *mzp++ = 0;                                      \
+            case 3:                                              \
+                *mzp++ = 0;                                      \
+            case 2:                                              \
+                *mzp++ = 0;                                      \
+            case 1:                                              \
+                *mzp++ = 0;                                      \
+                if (mcn <= 0)                                    \
+                    break;                                       \
+                mcn--;                                           \
+            }                                                    \
+        }                                                        \
+    } while (0)
+
+#define MALLOC_COPY(dest, src, nbytes)                           \
+    do {                                                         \
+        INTERNAL_SIZE_T *mcsrc = (INTERNAL_SIZE_T *)src;         \
+        INTERNAL_SIZE_T *mcdst = (INTERNAL_SIZE_T *)dest;        \
+        CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \
+        long mcn;                                                \
+        if (mctmp < 8)                                           \
+            mcn = 0;                                             \
+        else {                                                   \
+            mcn = (mctmp - 1) / 8;                               \
+            mctmp %= 8;                                          \
+        }                                                        \
+        switch (mctmp) {                                         \
+        case 0:                                                  \
+            for (;;) {                                           \
+                *mcdst++ = *mcsrc++;                             \
+            case 7:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 6:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 5:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 4:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 3:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 2:                                              \
+                *mcdst++ = *mcsrc++;                             \
+            case 1:                                              \
+                *mcdst++ = *mcsrc++;                             \
+                if (mcn <= 0)                                    \
+                    break;                                       \
+                mcn--;                                           \
+            }                                                    \
+        }                                                        \
+    } while (0)
+
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+#if HAVE_MMAP
+
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#ifndef LACKS_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS,
+   so the following is unlikely to be needed, but is
+   supplied just in case.
+*/
+
+#ifndef MAP_ANONYMOUS
+
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+
+#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? (dev_zero_fd = open("/dev/zero", O_RDWR),               \
+                                                            mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) \
+                                                         : mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
+
+#else
+
+#define MMAP(addr, size, prot, flags) \
+    (mmap((addr), (size), (prot), (flags) | MAP_ANONYMOUS, -1, 0))
+
+#endif
+
+#endif /* HAVE_MMAP */
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+    INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free).  */
+    INTERNAL_SIZE_T size;      /* Size in bytes, including overhead. */
+
+    struct malloc_chunk *fd; /* double links -- used only if free. */
+    struct malloc_chunk *bk;
+};
+
+typedef struct malloc_chunk *mchunkptr;
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk, if allocated            | |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             User data starts here...                          .
+            .                                                               .
+            .             (malloc_usable_space() bytes)                     .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk                                     |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+        trailing size field since there is no next contiguous chunk
+        that would have to index off it. After initialization, `top'
+        is forced to always exist.  If it would become less than
+        MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+        bit (IS_MMAPPED) set in their size fields.  Because they are
+        allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p) ((Void_t *)((char *)(p) + 2 * SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - 2 * SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE \
+    (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around sero.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req) \
+    ((CHUNK_SIZE_T)(req) >=       \
+     (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req) \
+    (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz) \
+    if (REQUEST_OUT_OF_RANGE(req)) {  \
+        MALLOC_FAILURE_ACTION;        \
+        return 0;                     \
+    }                                 \
+    (sz) = request2size(req);
+
+/*
+  --------------- Physical chunk operations ---------------
+*/
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p) ((p)->size & PREV_INUSE)
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+/*
+  Bits to mask off when extracting size
+
+  Note: IS_MMAPPED is intentionally not masked off from size field in
+  macros for which mmapped chunks should never be seen. This should
+  cause helpful core dumps to occur if it is tried by accident by
+  people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) ((p)->size & ~(SIZE_BITS))
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_size)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p) \
+    ((((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p) \
+    ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
+
+#define clear_inuse(p) \
+    ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s) \
+    (((mchunkptr)(((char *)(p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s) \
+    (((mchunkptr)(((char *)(p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s) \
+    (((mchunkptr)(((char *)(p)) + (s)))->size &= ~(PREV_INUSE))
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s) ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_size = (s))
+
+/*
+  -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+   * If HAVE_MMAP is true, but mmap doesn't support
+     MAP_ANONYMOUS, a dummy file descriptor for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+*/
+
+/*
+  Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+*/
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) ((mbinptr)((char *)&((m)->bins[(i) << 1]) - (SIZE_SZ << 1)))
+
+/* analog of ++bin */
+#define next_bin(b) ((mbinptr)((char *)(b) + (sizeof(mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b) ((b)->fd)
+#define last(b) ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) \
+    {                     \
+        FD = P->fd;       \
+        BK = P->bk;       \
+        FD->bk = BK;      \
+        BK->fd = FD;      \
+    }
+
+/*
+  Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+*/
+
+#define NBINS 96
+#define NSMALLBINS 32
+#define SMALLBIN_WIDTH 8
+#define MIN_LARGE_SIZE 256
+
+#define in_smallbin_range(sz) \
+    ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) (((unsigned)(sz)) >> 3)
+
+/*
+  Compute index for size. We expect this to be inlined when
+  compiled with optimization, else not, which works out well.
+*/
+static int largebin_index(unsigned int sz) {
+    unsigned int x = sz >> SMALLBIN_WIDTH;
+    unsigned int m; /* bit position of highest set bit of m */
+
+    if (x >= 0x10000) {
+        return NBINS - 1;
+    }
+
+    /* On intel, use BSRL instruction to find highest bit */
+#if defined(__GNUC__) && defined(i386)
+
+    __asm__("bsrl %1,%0\n\t"
+            : "=r"(m)
+            : "g"(x));
+
+#else
+    {
+        /*
+          Based on branch-free nlz algorithm in chapter 5 of Henry
+          S. Warren Jr's book "Hacker's Delight".
+        */
+
+        unsigned int n = ((x - 0x100) >> 16) & 8;
+        x <<= n;
+        m = ((x - 0x1000) >> 16) & 4;
+        n += m;
+        x <<= m;
+        m = ((x - 0x4000) >> 16) & 2;
+        n += m;
+        x = (x << m) >> 14;
+        m = 13 - n + (x & ~(x >> 1));
+    }
+#endif
+
+    /* Use next 2 bits to create finer-granularity bins */
+    return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3);
+}
+
+#define bin_index(sz) \
+    ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
+
+/*
+  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+  first bin that is maintained in sorted order. This must
+  be the smallest size corresponding to a given bin.
+
+  Normally, this should be MIN_LARGE_SIZE. But you can weaken
+  best fit guarantees to sometimes speed up malloc by increasing value.
+  Doing this means that malloc may choose a chunk that is
+  non-best-fitting by up to the width of the bin.
+
+  Some useful cutoff values:
+      512 - all bins sorted
+     2560 - leaves bins <=     64 bytes wide unsorted
+    12288 - leaves bins <=    512 bytes wide unsorted
+    65536 - leaves bins <=   4096 bytes wide unsorted
+   262144 - leaves bins <=  32768 bytes wide unsorted
+       -1 - no bins sorted (not recommended!)
+*/
+
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*
+  Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M) (bin_at(M, 1))
+
+/*
+  Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sYSMALLOc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M) (unsorted_chunks(M))
+
+/*
+  Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT 5
+#define BITSPERMAP (1U << BINMAPSHIFT)
+#define BINMAPSIZE (NBINS / BITSPERMAP)
+
+#define idx2block(i) ((i) >> BINMAPSHIFT)
+#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
+#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i))
+
+/*
+  Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+*/
+
+typedef struct malloc_chunk *mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE 80
+
+#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE)) + 1)
+
+/*
+  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+  that triggers automatic consolidation of possibly-surrounding
+  fastbin chunks. This is a heuristic, so the exact value should not
+  matter too much. It is defined at half the default trim threshold as a
+  compromise heuristic to only attempt consolidation if it is likely
+  to lead to trimming. However, it is not dynamically tunable, since
+  consolidation reduces fragmentation surrounding loarge chunks even
+  if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD \
+    ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
+
+/*
+  Since the lowest 2 bits in max_fast don't matter in size comparisons,
+  they are used as flags.
+*/
+
+/*
+  ANYCHUNKS_BIT held in max_fast indicates that there may be any
+  freed chunks at all. It is set true when entering a chunk into any
+  bin.
+*/
+
+#define ANYCHUNKS_BIT (1U)
+
+#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT))
+#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT)
+#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT)
+
+/*
+  FASTCHUNKS_BIT held in max_fast indicates that there are probably
+  some fastbin chunks. It is set true on entering a chunk into any
+  fastbin, and cleared only in malloc_consolidate.
+*/
+
+#define FASTCHUNKS_BIT (2U)
+
+#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT))
+#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT))
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+*/
+
+#define set_max_fast(M, s)                                            \
+    (M)->max_fast = (((s) == 0) ? SMALLBIN_WIDTH : request2size(s)) | \
+                    ((M)->max_fast & (FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+#define get_max_fast(M) \
+    ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+/*
+  morecore_properties is a status word holding dynamically discovered
+  or controlled properties of the morecore function
+*/
+
+#define MORECORE_CONTIGUOUS_BIT (1U)
+
+#define contiguous(M) \
+    (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT))
+#define noncontiguous(M) \
+    (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0)
+#define set_contiguous(M) \
+    ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT)
+#define set_noncontiguous(M) \
+    ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
+
+/*
+   ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+
+    /* The maximum chunk size to be eligible for fastbin */
+    INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */
+
+    /* Fastbins */
+    mfastbinptr fastbins[NFASTBINS];
+
+    /* Base of the topmost chunk -- not otherwise kept in a bin */
+    mchunkptr top;
+
+    /* The remainder from the most recent split of a small request */
+    mchunkptr last_remainder;
+
+    /* Normal bins packed as described above */
+    mchunkptr bins[NBINS * 2];
+
+    /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
+    unsigned int binmap[BINMAPSIZE + 1];
+
+    /* Tunable parameters */
+    CHUNK_SIZE_T trim_threshold;
+    INTERNAL_SIZE_T top_pad;
+    INTERNAL_SIZE_T mmap_threshold;
+
+    /* Memory map support */
+    int n_mmaps;
+    int n_mmaps_max;
+    int max_n_mmaps;
+
+    /* Cache malloc_getpagesize */
+    unsigned int pagesize;
+
+    /* Track properties of MORECORE */
+    unsigned int morecore_properties;
+
+    /* Statistics */
+    INTERNAL_SIZE_T mmapped_mem;
+    INTERNAL_SIZE_T sbrked_mem;
+    INTERNAL_SIZE_T max_sbrked_mem;
+    INTERNAL_SIZE_T max_mmapped_mem;
+    INTERNAL_SIZE_T max_total_mem;
+};
+
+typedef struct malloc_state *mstate;
+
+/*
+   There is exactly one instance of this struct in this malloc.
+   If you are adapting this malloc in a way that does NOT use a static
+   malloc_state, you MUST explicitly zero-fill it before using. This
+   malloc relies on the property that malloc_state is initialized to
+   all zeroes (as is true of C statics).
+*/
+
+static struct malloc_state av_; /* never directly referenced */
+
+/*
+   All uses of av_ are via get_malloc_state().
+   At most one "call" to get_malloc_state is made per invocation of
+   the public versions of malloc and free, but other routines
+   that in turn invoke malloc and/or free may call more then once.
+   Also, it is called in check* routines if DL_DEBUG is set.
+*/
+
+#define get_malloc_state() (&(av_))
+
+/*
+  Initialize a malloc_state struct.
+
+  This is called only from within malloc_consolidate, which needs
+  be called in the same contexts anyway.  It is never called directly
+  outside of malloc_consolidate because some optimizing compilers try
+  to inline it at all call points, which turns out not to be an
+  optimization at all. (Inlining it in malloc_consolidate is fine though.)
+*/
+
+#if __STD_C
+static void malloc_init_state(mstate av)
+#else
+static void malloc_init_state(av) mstate av;
+#endif
+{
+    int i;
+    mbinptr bin;
+
+    /* Establish circular links for normal bins */
+    for (i = 1; i < NBINS; ++i) {
+        bin = bin_at(av, i);
+        bin->fd = bin->bk = bin;
+    }
+
+    av->top_pad = DEFAULT_TOP_PAD;
+    av->n_mmaps_max = DEFAULT_MMAP_MAX;
+    av->mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    av->trim_threshold = DEFAULT_TRIM_THRESHOLD;
+
+#if MORECORE_CONTIGUOUS
+    set_contiguous(av);
+#else
+    set_noncontiguous(av);
+#endif
+
+    set_max_fast(av, DEFAULT_MXFAST);
+
+    av->top = initial_top(av);
+    av->pagesize = malloc_getpagesize;
+}
+
+/*
+   Other internal utilities operating on mstates
+*/
+
+#if __STD_C
+static Void_t *sYSMALLOc(INTERNAL_SIZE_T, mstate);
+static int sYSTRIm(size_t, mstate);
+static void malloc_consolidate(mstate);
+static Void_t **iALLOc(size_t, size_t *, int, Void_t **);
+#else
+static Void_t *sYSMALLOc();
+static int sYSTRIm();
+static void malloc_consolidate();
+static Void_t **iALLOc();
+#endif
+
+/*
+  Debugging support
+
+  These routines make a number of assertions about the states
+  of data structures that should be true at all times. If any
+  are not true, it's very likely that a user program has somehow
+  trashed memory. (It's also possible that there is a coding error
+  in malloc. In which case, please report it!)
+*/
+
+#if !DL_DEBUG
+
+#define check_chunk(P)
+#define check_free_chunk(P)
+#define check_inuse_chunk(P)
+#define check_remalloced_chunk(P, N)
+#define check_malloced_chunk(P, N)
+#define check_malloc_state()
+
+#else
+#define check_chunk(P) do_check_chunk(P)
+#define check_free_chunk(P) do_check_free_chunk(P)
+#define check_inuse_chunk(P) do_check_inuse_chunk(P)
+#define check_remalloced_chunk(P, N) do_check_remalloced_chunk(P, N)
+#define check_malloced_chunk(P, N) do_check_malloced_chunk(P, N)
+#define check_malloc_state() do_check_malloc_state()
+
+/*
+  Properties of all chunks
+*/
+
+#if __STD_C
+static void do_check_chunk(mchunkptr p)
+#else
+static void do_check_chunk(p) mchunkptr p;
+#endif
+{
+    mstate av = get_malloc_state();
+    CHUNK_SIZE_T sz = chunksize(p);
+    /* min and max possible addresses assuming contiguous allocation */
+    char *max_address = (char *)(av->top) + chunksize(av->top);
+    char *min_address = max_address - av->sbrked_mem;
+
+    if (!chunk_is_mmapped(p)) {
+
+        /* Has legal address ... */
+        if (p != av->top) {
+            if (contiguous(av)) {
+                assert(((char *)p) >= min_address);
+                assert(((char *)p + sz) <= ((char *)(av->top)));
+            }
+        } else {
+            /* top size is always at least MINSIZE */
+            assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
+            /* top predecessor always marked inuse */
+            assert(prev_inuse(p));
+        }
+    } else {
+#if HAVE_MMAP
+        /* address is outside main heap  */
+        if (contiguous(av) && av->top != initial_top(av)) {
+            assert(((char *)p) < min_address || ((char *)p) > max_address);
+        }
+        /* chunk is page-aligned */
+        assert(((p->prev_size + sz) & (av->pagesize - 1)) == 0);
+        /* mem is aligned */
+        assert(aligned_OK(chunk2mem(p)));
+#else
+        /* force an appropriate assert violation if debug set */
+        assert(!chunk_is_mmapped(p));
+#endif
+    }
+}
+
+/*
+  Properties of free chunks
+*/
+
+#if __STD_C
+static void do_check_free_chunk(mchunkptr p)
+#else
+static void do_check_free_chunk(p) mchunkptr p;
+#endif
+{
+    mstate av = get_malloc_state();
+
+    INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
+    mchunkptr next = chunk_at_offset(p, sz);
+
+    do_check_chunk(p);
+
+    /* Chunk must claim to be free ... */
+    assert(!inuse(p));
+    assert(!chunk_is_mmapped(p));
+
+    /* Unless a special marker, must have OK fields */
+    if ((CHUNK_SIZE_T)(sz) >= MINSIZE) {
+        assert((sz & MALLOC_ALIGN_MASK) == 0);
+        assert(aligned_OK(chunk2mem(p)));
+        /* ... matching footer field */
+        assert(next->prev_size == sz);
+        /* ... and is fully consolidated */
+        assert(prev_inuse(p));
+        assert(next == av->top || inuse(next));
+
+        /* ... and has minimally sane links */
+        assert(p->fd->bk == p);
+        assert(p->bk->fd == p);
+    } else { /* markers are always of size SIZE_SZ */
+        assert(sz == SIZE_SZ);
+    }
+}
+
+/*
+  Properties of inuse chunks
+*/
+
+#if __STD_C
+static void do_check_inuse_chunk(mchunkptr p)
+#else
+static void do_check_inuse_chunk(p) mchunkptr p;
+#endif
+{
+    mstate av = get_malloc_state();
+    mchunkptr next;
+    do_check_chunk(p);
+
+    if (chunk_is_mmapped(p)) {
+        return; /* mmapped chunks have no next/prev */
+    }
+
+    /* Check whether it claims to be in use ... */
+    assert(inuse(p));
+
+    next = next_chunk(p);
+
+    /* ... and is surrounded by OK chunks.
+      Since more things can be checked with free chunks than inuse ones,
+      if an inuse chunk borders them and debug is on, it's worth doing them.
+    */
+    if (!prev_inuse(p)) {
+        /* Note that we cannot even look at prev unless it is not inuse */
+        mchunkptr prv = prev_chunk(p);
+        assert(next_chunk(prv) == p);
+        do_check_free_chunk(prv);
+    }
+
+    if (next == av->top) {
+        assert(prev_inuse(next));
+        assert(chunksize(next) >= MINSIZE);
+    } else if (!inuse(next)) {
+        do_check_free_chunk(next);
+    }
+}
+
+/*
+  Properties of chunks recycled from fastbins
+*/
+
+#if __STD_C
+static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_remalloced_chunk(p, s) mchunkptr p;
+INTERNAL_SIZE_T s;
+#endif
+{
+    INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
+
+    do_check_inuse_chunk(p);
+
+    /* Legal size ... */
+    assert((sz & MALLOC_ALIGN_MASK) == 0);
+    assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
+    /* ... and alignment */
+    assert(aligned_OK(chunk2mem(p)));
+    /* chunk is less than MINSIZE more than request */
+    assert((long)(sz) - (long)(s) >= 0);
+    assert((long)(sz) - (long)(s + MINSIZE) < 0);
+}
+
+/*
+  Properties of nonrecycled chunks at the point they are malloced
+*/
+
+#if __STD_C
+static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_malloced_chunk(p, s) mchunkptr p;
+INTERNAL_SIZE_T s;
+#endif
+{
+    /* same as recycled case ... */
+    do_check_remalloced_chunk(p, s);
+
+    /*
+      ... plus,  must obey implementation invariant that prev_inuse is
+      always true of any allocated chunk; i.e., that each allocated
+      chunk borders either a previously allocated and still in-use
+      chunk, or the base of its memory arena. This is ensured
+      by making all allocations from the the `lowest' part of any found
+      chunk.  This does not necessarily hold however for chunks
+      recycled via fastbins.
+    */
+
+    assert(prev_inuse(p));
+}
+
+/*
+  Properties of malloc_state.
+
+  This may be useful for debugging malloc, as well as detecting user
+  programmer errors that somehow write into malloc_state.
+
+  If you are extending or experimenting with this malloc, you can
+  probably figure out how to hack this routine to print out or
+  display chunk addresses, sizes, bins, and other instrumentation.
+*/
+
+static void do_check_malloc_state(void) {
+    mstate av = get_malloc_state();
+    int i;
+    mchunkptr p;
+    mchunkptr q;
+    mbinptr b;
+    unsigned int binbit;
+    int empty;
+    unsigned int idx;
+    INTERNAL_SIZE_T size;
+    CHUNK_SIZE_T total = 0;
+    int max_fast_bin;
+
+    /* internal size_t must be no wider than pointer type */
+    assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char *));
+
+    /* alignment is a power of 2 */
+    assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+    /* cannot run remaining checks until fully initialized */
+    if (av->top == 0 || av->top == initial_top(av)) {
+        return;
+    }
+
+    /* pagesize is a power of 2 */
+    assert((av->pagesize & (av->pagesize - 1)) == 0);
+
+    /* properties of fastbins */
+
+    /* max_fast is in allowed range */
+    assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE));
+
+    max_fast_bin = fastbin_index(av->max_fast);
+
+    for (i = 0; NFASTBINS - i > 0; ++i) {
+        p = av->fastbins[i];
+
+        /* all bins past max_fast are empty */
+        if (i > max_fast_bin) {
+            assert(p == 0);
+        }
+
+        while (p != 0) {
+            /* each chunk claims to be inuse */
+            do_check_inuse_chunk(p);
+            total += chunksize(p);
+            /* chunk belongs in this bin */
+            assert(fastbin_index(chunksize(p)) == i);
+            p = p->fd;
+        }
+    }
+
+    if (total != 0) {
+        assert(have_fastchunks(av));
+    } else if (!have_fastchunks(av)) {
+        assert(total == 0);
+    }
+
+    /* check normal bins */
+    for (i = 1; i < NBINS; ++i) {
+        b = bin_at(av, i);
+
+        /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+        if (i >= 2) {
+            binbit = get_binmap(av, i);
+            empty = last(b) == b;
+            if (!binbit) {
+                assert(empty);
+            } else if (!empty) {
+                assert(binbit);
+            }
+        }
+
+        for (p = last(b); p != b; p = p->bk) {
+            /* each chunk claims to be free */
+            do_check_free_chunk(p);
+            size = chunksize(p);
+            total += size;
+            if (i >= 2) {
+                /* chunk belongs in bin */
+                idx = bin_index(size);
+                assert(idx == i);
+                /* lists are sorted */
+                if ((CHUNK_SIZE_T)size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
+                    assert(p->bk == b ||
+                           (CHUNK_SIZE_T)chunksize(p->bk) >=
+                               (CHUNK_SIZE_T)chunksize(p));
+                }
+            }
+            /* chunk is followed by a legal chain of inuse chunks */
+            for (q = next_chunk(p);
+                 (q != av->top && inuse(q) &&
+                  (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE);
+                 q = next_chunk(q)) {
+                do_check_inuse_chunk(q);
+            }
+        }
+    }
+
+    /* top chunk is OK */
+    check_chunk(av->top);
+
+    /* sanity checks for statistics */
+
+    assert(total <= (CHUNK_SIZE_T)(av->max_total_mem));
+    assert(av->n_mmaps >= 0);
+    assert(av->n_mmaps <= av->max_n_mmaps);
+
+    assert((CHUNK_SIZE_T)(av->sbrked_mem) <=
+           (CHUNK_SIZE_T)(av->max_sbrked_mem));
+
+    assert((CHUNK_SIZE_T)(av->mmapped_mem) <=
+           (CHUNK_SIZE_T)(av->max_mmapped_mem));
+
+    assert((CHUNK_SIZE_T)(av->max_total_mem) >=
+           (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem));
+}
+#endif
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+  sysmalloc handles malloc cases requiring more memory from the system.
+  On entry, it is assumed that av->top does not have enough
+  space to service request for nb bytes, thus requiring that av->top
+  be extended or replaced.
+*/
+
+#if __STD_C
+static Void_t *sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
+#else
+static Void_t *sYSMALLOc(nb, av)
+INTERNAL_SIZE_T nb;
+mstate av;
+#endif
+{
+    mchunkptr old_top;        /* incoming value of av->top */
+    INTERNAL_SIZE_T old_size; /* its size */
+    char *old_end;            /* its end address */
+
+    long size; /* arg to first MORECORE or mmap call */
+    char *brk; /* return value from MORECORE */
+
+    long correction; /* arg to 2nd MORECORE call */
+    char *snd_brk;   /* 2nd return val */
+
+    INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+    INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+    char *aligned_brk;              /* aligned offset into brk */
+
+    mchunkptr p;                 /* the allocated/returned chunk */
+    mchunkptr remainder;         /* remainder from allocation */
+    CHUNK_SIZE_T remainder_size; /* its size */
+
+    CHUNK_SIZE_T sum; /* for updating stats */
+
+    size_t pagemask = av->pagesize - 1;
+
+    /*
+      If there is space available in fastbins, consolidate and retry
+      malloc from scratch rather than getting memory from system.  This
+      can occur only if nb is in smallbin range so we didn't consolidate
+      upon entry to malloc. It is much easier to handle this case here
+      than in malloc proper.
+    */
+
+    if (have_fastchunks(av)) {
+        assert(in_smallbin_range(nb));
+        malloc_consolidate(av);
+        return mALLOc(nb - MALLOC_ALIGN_MASK);
+    }
+
+#if HAVE_MMAP
+
+    /*
+      If have mmap, and the request size meets the mmap threshold, and
+      the system supports mmap, and there are few enough currently
+      allocated mmapped regions, try to directly map this request
+      rather than expanding top.
+    */
+
+    if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) &&
+        (av->n_mmaps < av->n_mmaps_max)) {
+
+        char *mm; /* return value from mmap call*/
+
+        /*
+          Round up size to nearest page.  For mmapped chunks, the overhead
+          is one SIZE_SZ unit larger than for normal chunks, because there
+          is no following chunk whose prev_size field could be used.
+        */
+        size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
+
+        /* Don't try if size wraps around 0 */
+        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+
+            mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+            if (mm != (char *)(MORECORE_FAILURE)) {
+
+                /*
+                  The offset to the start of the mmapped region is stored
+                  in the prev_size field of the chunk. This allows us to adjust
+                  returned start address to meet alignment requirements here
+                  and in memalign(), and still be able to compute proper
+                  address argument for later munmap in free() and realloc().
+                */
+
+                front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+                if (front_misalign > 0) {
+                    correction = MALLOC_ALIGNMENT - front_misalign;
+                    p = (mchunkptr)(mm + correction);
+                    p->prev_size = correction;
+                    set_head(p, (size - correction) | IS_MMAPPED);
+                } else {
+                    p = (mchunkptr)mm;
+                    p->prev_size = 0;
+                    set_head(p, size | IS_MMAPPED);
+                }
+
+                /* update statistics */
+
+                if (++av->n_mmaps > av->max_n_mmaps) {
+                    av->max_n_mmaps = av->n_mmaps;
+                }
+
+                sum = av->mmapped_mem += size;
+                if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) {
+                    av->max_mmapped_mem = sum;
+                }
+                sum += av->sbrked_mem;
+                if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
+                    av->max_total_mem = sum;
+                }
+
+                check_chunk(p);
+
+                return chunk2mem(p);
+            }
+        }
+    }
+#endif
+
+    /* Record incoming configuration of top */
+
+    old_top = av->top;
+    old_size = chunksize(old_top);
+    old_end = (char *)(chunk_at_offset(old_top, old_size));
+
+    brk = snd_brk = (char *)(MORECORE_FAILURE);
+
+    /*
+       If not the first time through, we require old_size to be
+       at least MINSIZE and to have prev_inuse set.
+    */
+
+    assert((old_top == initial_top(av) && old_size == 0) ||
+           ((CHUNK_SIZE_T)(old_size) >= MINSIZE &&
+            prev_inuse(old_top)));
+
+    /* Precondition: not enough current space to satisfy nb request */
+    assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE));
+
+    /* Precondition: all fastbins are consolidated */
+    assert(!have_fastchunks(av));
+
+    /* Request enough space for nb + pad + overhead */
+
+    size = nb + av->top_pad + MINSIZE;
+
+    /*
+      If contiguous, we can subtract out existing space that we hope to
+      combine with new space. We add it back later only if
+      we don't actually get contiguous space.
+    */
+
+    if (contiguous(av)) {
+        size -= old_size;
+    }
+
+    /*
+      Round to a multiple of page size.
+      If MORECORE is not contiguous, this ensures that we only call it
+      with whole-page arguments.  And if MORECORE is contiguous and
+      this is not first time through, this preserves page-alignment of
+      previous calls. Otherwise, we correct to page-align below.
+    */
+
+    size = (size + pagemask) & ~pagemask;
+
+    /*
+      Don't try to call MORECORE if argument is so big as to appear
+      negative. Note that since mmap takes size_t arg, it may succeed
+      below even if we cannot call MORECORE.
+    */
+
+    if (size > 0) {
+        brk = (char *)(MORECORE(size));
+    }
+
+    /*
+      If have mmap, try using it as a backup when MORECORE fails or
+      cannot be used. This is worth doing on systems that have "holes" in
+      address space, so sbrk cannot extend to give contiguous space, but
+      space is available elsewhere.  Note that we ignore mmap max count
+      and threshold limits, since the space will not be used as a
+      segregated mmap region.
+    */
+
+#if HAVE_MMAP
+    if (brk == (char *)(MORECORE_FAILURE)) {
+
+        /* Cannot merge with old top, so add its size back in */
+        if (contiguous(av)) {
+            size = (size + old_size + pagemask) & ~pagemask;
+        }
+
+        /* If we are relying on mmap as backup, then use larger units */
+        if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE)) {
+            size = MMAP_AS_MORECORE_SIZE;
+        }
+
+        /* Don't try if size wraps around 0 */
+        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+
+            brk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+            if (brk != (char *)(MORECORE_FAILURE)) {
+
+                /* We do not need, and cannot use, another sbrk call to find end */
+                snd_brk = brk + size;
+
+                /*
+                   Record that we no longer have a contiguous sbrk region.
+                   After the first time mmap is used as backup, we do not
+                   ever rely on contiguous space since this could incorrectly
+                   bridge regions.
+                */
+                set_noncontiguous(av);
+            }
+        }
+    }
+#endif
+
+    if (brk != (char *)(MORECORE_FAILURE)) {
+        av->sbrked_mem += size;
+
+        /*
+          If MORECORE extends previous space, we can likewise extend top size.
+        */
+
+        if (brk == old_end && snd_brk == (char *)(MORECORE_FAILURE)) {
+            set_head(old_top, (size + old_size) | PREV_INUSE);
+        }
+
+        /*
+          Otherwise, make adjustments:
+
+          * If the first time through or noncontiguous, we need to call sbrk
+            just to find out where the end of memory lies.
+
+          * We need to ensure that all returned chunks from malloc will meet
+            MALLOC_ALIGNMENT
+
+          * If there was an intervening foreign sbrk, we need to adjust sbrk
+            request size to account for fact that we will not be able to
+            combine new space with existing space in old_top.
+
+          * Almost all systems internally allocate whole pages at a time, in
+            which case we might as well use the whole last page of request.
+            So we allocate enough more memory to hit a page boundary now,
+            which in turn causes future contiguous calls to page-align.
+        */
+
+        else {
+            front_misalign = 0;
+            end_misalign = 0;
+            correction = 0;
+            aligned_brk = brk;
+
+            /*
+              If MORECORE returns an address lower than we have seen before,
+              we know it isn't really contiguous.  This and some subsequent
+              checks help cope with non-conforming MORECORE functions and
+              the presence of "foreign" calls to MORECORE from outside of
+              malloc or by other threads.  We cannot guarantee to detect
+              these in all cases, but cope with the ones we do detect.
+            */
+            if (contiguous(av) && old_size != 0 && brk < old_end) {
+                set_noncontiguous(av);
+            }
+
+            /* handle contiguous cases */
+            if (contiguous(av)) {
+
+                /*
+                   We can tolerate forward non-contiguities here (usually due
+                   to foreign calls) but treat them as part of our space for
+                   stats reporting.
+                */
+                if (old_size != 0) {
+                    av->sbrked_mem += brk - old_end;
+                }
+
+                /* Guarantee alignment of first new chunk made from this space */
+
+                front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+                if (front_misalign > 0) {
+
+                    /*
+                      Skip over some bytes to arrive at an aligned position.
+                      We don't need to specially mark these wasted front bytes.
+                      They will never be accessed anyway because
+                      prev_inuse of av->top (and any chunk created from its start)
+                      is always true after initialization.
+                    */
+
+                    correction = MALLOC_ALIGNMENT - front_misalign;
+                    aligned_brk += correction;
+                }
+
+                /*
+                  If this isn't adjacent to existing space, then we will not
+                  be able to merge with old_top space, so must add to 2nd request.
+                */
+
+                correction += old_size;
+
+                /* Extend the end address to hit a page boundary */
+                end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
+                correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
+
+                assert(correction >= 0);
+                snd_brk = (char *)(MORECORE(correction));
+
+                if (snd_brk == (char *)(MORECORE_FAILURE)) {
+                    /*
+                      If can't allocate correction, try to at least find out current
+                      brk.  It might be enough to proceed without failing.
+                    */
+                    correction = 0;
+                    snd_brk = (char *)(MORECORE(0));
+                } else if (snd_brk < brk) {
+                    /*
+                      If the second call gives noncontiguous space even though
+                      it says it won't, the only course of action is to ignore
+                      results of second call, and conservatively estimate where
+                      the first call left us. Also set noncontiguous, so this
+                      won't happen again, leaving at most one hole.
+
+                      Note that this check is intrinsically incomplete.  Because
+                      MORECORE is allowed to give more space than we ask for,
+                      there is no reliable way to detect a noncontiguity
+                      producing a forward gap for the second call.
+                    */
+                    snd_brk = brk + size;
+                    correction = 0;
+                    set_noncontiguous(av);
+                }
+            }
+
+            /* handle non-contiguous cases */
+            else {
+                /* MORECORE/mmap must correctly align */
+                assert(aligned_OK(chunk2mem(brk)));
+
+                /* Find out current end of memory */
+                if (snd_brk == (char *)(MORECORE_FAILURE)) {
+                    snd_brk = (char *)(MORECORE(0));
+                    av->sbrked_mem += snd_brk - brk - size;
+                }
+            }
+
+            /* Adjust top based on results of second sbrk */
+            if (snd_brk != (char *)(MORECORE_FAILURE)) {
+                av->top = (mchunkptr)aligned_brk;
+                set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                av->sbrked_mem += correction;
+
+                /*
+                  If not the first time through, we either have a
+                  gap due to foreign sbrk or a non-contiguous region.  Insert a
+                  double fencepost at old_top to prevent consolidation with space
+                  we don't own. These fenceposts are artificial chunks that are
+                  marked as inuse and are in any case too small to use.  We need
+                  two to make sizes and alignments work out.
+                */
+
+                if (old_size != 0) {
+                    /*
+                       Shrink old_top to insert fenceposts, keeping size a
+                       multiple of MALLOC_ALIGNMENT. We know there is at least
+                       enough space in old_top to do this.
+                    */
+                    old_size = (old_size - 3 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                    set_head(old_top, old_size | PREV_INUSE);
+
+                    /*
+                      Note that the following assignments completely overwrite
+                      old_top when old_size was previously MINSIZE.  This is
+                      intentional. We need the fencepost, even if old_top otherwise gets
+                      lost.
+                    */
+                    chunk_at_offset(old_top, old_size)->size =
+                        SIZE_SZ | PREV_INUSE;
+
+                    chunk_at_offset(old_top, old_size + SIZE_SZ)->size =
+                        SIZE_SZ | PREV_INUSE;
+
+                    /*
+                       If possible, release the rest, suppressing trimming.
+                    */
+                    if (old_size >= MINSIZE) {
+                        INTERNAL_SIZE_T tt = av->trim_threshold;
+                        av->trim_threshold = (INTERNAL_SIZE_T)(-1);
+                        fREe(chunk2mem(old_top));
+                        av->trim_threshold = tt;
+                    }
+                }
+            }
+        }
+
+        /* Update statistics */
+        sum = av->sbrked_mem;
+        if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem)) {
+            av->max_sbrked_mem = sum;
+        }
+
+        sum += av->mmapped_mem;
+        if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
+            av->max_total_mem = sum;
+        }
+
+        check_malloc_state();
+
+        /* finally, do the allocation */
+
+        p = av->top;
+        size = chunksize(p);
+
+        /* check that one of the above allocation paths succeeded */
+        if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+            remainder_size = size - nb;
+            remainder = chunk_at_offset(p, nb);
+            av->top = remainder;
+            set_head(p, nb | PREV_INUSE);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            check_malloced_chunk(p, nb);
+            return chunk2mem(p);
+        }
+    }
+
+    /* catch all failure paths */
+    MALLOC_FAILURE_ACTION;
+    return 0;
+}
+
+/*
+  sYSTRIm is an inverse of sorts to sYSMALLOc.  It gives memory back
+  to the system (via negative arguments to sbrk) if there is unused
+  memory at the `high' end of the malloc pool. It is called
+  automatically by free() when top space exceeds the trim
+  threshold. It is also called by the public malloc_trim routine.  It
+  returns 1 if it actually released any memory, else 0.
+*/
+
+#if __STD_C
+static int sYSTRIm(size_t pad, mstate av)
+#else
+static int sYSTRIm(pad, av)
+size_t pad;
+mstate av;
+#endif
+{
+    long top_size;     /* Amount of top-most memory */
+    long extra;        /* Amount to release */
+    long released;     /* Amount actually released */
+    char *current_brk; /* address returned by pre-check sbrk call */
+    char *new_brk;     /* address returned by post-check sbrk call */
+    size_t pagesz;
+
+    pagesz = av->pagesize;
+    top_size = chunksize(av->top);
+
+    /* Release in pagesize units, keeping at least one page */
+    extra = ((top_size - pad - MINSIZE + (pagesz - 1)) / pagesz - 1) * pagesz;
+
+    if (extra > 0) {
+
+        /*
+          Only proceed if end of memory is where we last set it.
+          This avoids problems if there were foreign sbrk calls.
+        */
+        current_brk = (char *)(MORECORE(0));
+        if (current_brk == (char *)(av->top) + top_size) {
+
+            /*
+              Attempt to release memory. We ignore MORECORE return value,
+              and instead call again to find out where new end of memory is.
+              This avoids problems if first call releases less than we asked,
+              of if failure somehow altered brk value. (We could still
+              encounter problems if it altered brk in some very bad way,
+              but the only thing we can do is adjust anyway, which will cause
+              some downstream failure.)
+            */
+
+            MORECORE(-extra);
+            new_brk = (char *)(MORECORE(0));
+
+            if (new_brk != (char *)MORECORE_FAILURE) {
+                released = (long)(current_brk - new_brk);
+
+                if (released != 0) {
+                    /* Success. Adjust top. */
+                    av->sbrked_mem -= released;
+                    set_head(av->top, (top_size - released) | PREV_INUSE);
+                    check_malloc_state();
+                    return 1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/*
+  ------------------------------ malloc ------------------------------
+*/
+
+#if __STD_C
+Void_t *mALLOc(size_t bytes)
+#else
+Void_t *mALLOc(bytes)
+size_t bytes;
+#endif
+{
+    mstate av = get_malloc_state();
+
+    INTERNAL_SIZE_T nb; /* normalized request size */
+    unsigned int idx;   /* associated bin index */
+    mbinptr bin;        /* associated bin */
+    mfastbinptr *fb;    /* associated fastbin */
+
+    mchunkptr victim;     /* inspected/selected chunk */
+    INTERNAL_SIZE_T size; /* its size */
+    int victim_index;     /* its bin index */
+
+    mchunkptr remainder;         /* remainder from a split */
+    CHUNK_SIZE_T remainder_size; /* its size */
+
+    unsigned int block; /* bit map traverser */
+    unsigned int bit;   /* bit map traverser */
+    unsigned int map;   /* current word of binmap */
+
+    mchunkptr fwd; /* misc temp for linking */
+    mchunkptr bck; /* misc temp for linking */
+
+    /*
+      Convert request size to internal form by adding SIZE_SZ bytes
+      overhead plus possibly more to obtain necessary alignment and/or
+      to obtain a size of at least MINSIZE, the smallest allocatable
+      size. Also, checked_request2size traps (returning 0) request sizes
+      that are so large that they wrap around zero when padded and
+      aligned.
+    */
+
+    checked_request2size(bytes, nb);
+
+    /*
+      Bypass search if no frees yet
+     */
+    if (!have_anychunks(av)) {
+        if (av->max_fast == 0) { /* initialization check */
+            malloc_consolidate(av);
+        }
+        goto use_top;
+    }
+
+    /*
+      If the size qualifies as a fastbin, first check corresponding bin.
+    */
+
+    if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) {
+        fb = &(av->fastbins[(fastbin_index(nb))]);
+        if ((victim = *fb) != 0) {
+            *fb = victim->fd;
+            check_remalloced_chunk(victim, nb);
+            return chunk2mem(victim);
+        }
+    }
+
+    /*
+      If a small request, check regular bin.  Since these "smallbins"
+      hold one size each, no searching within bins is necessary.
+      (For a large request, we need to wait until unsorted chunks are
+      processed to find best fit. But for small ones, fits are exact
+      anyway, so we can check now, which is faster.)
+    */
+
+    if (in_smallbin_range(nb)) {
+        idx = smallbin_index(nb);
+        bin = bin_at(av, idx);
+
+        if ((victim = last(bin)) != bin) {
+            bck = victim->bk;
+            set_inuse_bit_at_offset(victim, nb);
+            bin->bk = bck;
+            bck->fd = bin;
+
+            check_malloced_chunk(victim, nb);
+            return chunk2mem(victim);
+        }
+    }
+
+    /*
+       If this is a large request, consolidate fastbins before continuing.
+       While it might look excessive to kill all fastbins before
+       even seeing if there is space available, this avoids
+       fragmentation problems normally associated with fastbins.
+       Also, in practice, programs tend to have runs of either small or
+       large requests, but less often mixtures, so consolidation is not
+       invoked all that often in most programs. And the programs that
+       it is called frequently in otherwise tend to fragment.
+    */
+
+    else {
+        idx = largebin_index(nb);
+        if (have_fastchunks(av)) {
+            malloc_consolidate(av);
+        }
+    }
+
+    /*
+      Process recently freed or remaindered chunks, taking one only if
+      it is exact fit, or, if this a small request, the chunk is remainder from
+      the most recent non-exact fit.  Place other traversed chunks in
+      bins.  Note that this step is the only place in any routine where
+      chunks are placed in bins.
+    */
+
+    while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
+        bck = victim->bk;
+        size = chunksize(victim);
+
+        /*
+           If a small request, try to use last remainder if it is the
+           only chunk in unsorted bin.  This helps promote locality for
+           runs of consecutive small requests. This is the only
+           exception to best-fit, and applies only when there is
+           no exact fit for a small chunk.
+        */
+
+        if (in_smallbin_range(nb) &&
+            bck == unsorted_chunks(av) &&
+            victim == av->last_remainder &&
+            (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
+
+            /* split and reattach remainder */
+            remainder_size = size - nb;
+            remainder = chunk_at_offset(victim, nb);
+            unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+            av->last_remainder = remainder;
+            remainder->bk = remainder->fd = unsorted_chunks(av);
+
+            set_head(victim, nb | PREV_INUSE);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            set_foot(remainder, remainder_size);
+
+            check_malloced_chunk(victim, nb);
+            return chunk2mem(victim);
+        }
+
+        /* remove from unsorted list */
+        unsorted_chunks(av)->bk = bck;
+        bck->fd = unsorted_chunks(av);
+
+        /* Take now instead of binning if exact fit */
+
+        if (size == nb) {
+            set_inuse_bit_at_offset(victim, size);
+            check_malloced_chunk(victim, nb);
+            return chunk2mem(victim);
+        }
+
+        /* place chunk in bin */
+
+        if (in_smallbin_range(size)) {
+            victim_index = smallbin_index(size);
+            bck = bin_at(av, victim_index);
+            fwd = bck->fd;
+        } else {
+            victim_index = largebin_index(size);
+            bck = bin_at(av, victim_index);
+            fwd = bck->fd;
+
+            if (fwd != bck) {
+                /* if smaller than smallest, place first */
+                if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) {
+                    fwd = bck;
+                    bck = bck->bk;
+                } else if ((CHUNK_SIZE_T)(size) >=
+                           (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
+
+                    /* maintain large bins in sorted order */
+                    size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
+                    while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) {
+                        fwd = fwd->fd;
+                    }
+                    bck = fwd->bk;
+                }
+            }
+        }
+
+        mark_bin(av, victim_index);
+        victim->bk = bck;
+        victim->fd = fwd;
+        fwd->bk = victim;
+        bck->fd = victim;
+    }
+
+    /*
+      If a large request, scan through the chunks of current bin to
+      find one that fits.  (This will be the smallest that fits unless
+      FIRST_SORTED_BIN_SIZE has been changed from default.)  This is
+      the only step where an unbounded number of chunks might be
+      scanned without doing anything useful with them. However the
+      lists tend to be short.
+    */
+
+    if (!in_smallbin_range(nb)) {
+        bin = bin_at(av, idx);
+
+        for (victim = last(bin); victim != bin; victim = victim->bk) {
+            size = chunksize(victim);
+
+            if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) {
+                remainder_size = size - nb;
+                unlink(victim, bck, fwd);
+
+                /* Exhaust */
+                if (remainder_size < MINSIZE) {
+                    set_inuse_bit_at_offset(victim, size);
+                    check_malloced_chunk(victim, nb);
+                    return chunk2mem(victim);
+                }
+                /* Split */
+                else {
+                    remainder = chunk_at_offset(victim, nb);
+                    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+                    remainder->bk = remainder->fd = unsorted_chunks(av);
+                    set_head(victim, nb | PREV_INUSE);
+                    set_head(remainder, remainder_size | PREV_INUSE);
+                    set_foot(remainder, remainder_size);
+                    check_malloced_chunk(victim, nb);
+                    return chunk2mem(victim);
+                }
+            }
+        }
+    }
+
+    /*
+      Search for a chunk by scanning bins, starting with next largest
+      bin. This search is strictly by best-fit; i.e., the smallest
+      (with ties going to approximately the least recently used) chunk
+      that fits is selected.
+
+      The bitmap avoids needing to check that most blocks are nonempty.
+    */
+
+    ++idx;
+    bin = bin_at(av, idx);
+    block = idx2block(idx);
+    map = av->binmap[block];
+    bit = idx2bit(idx);
+
+    for (;;) {
+
+        /* Skip rest of block if there are no more set bits in this block.  */
+        if (bit > map || bit == 0) {
+            do {
+                if (++block >= BINMAPSIZE) { /* out of bins */
+                    goto use_top;
+                }
+            } while ((map = av->binmap[block]) == 0);
+
+            bin = bin_at(av, (block << BINMAPSHIFT));
+            bit = 1;
+        }
+
+        /* Advance to bin with set bit. There must be one. */
+        while ((bit & map) == 0) {
+            bin = next_bin(bin);
+            bit <<= 1;
+            assert(bit != 0);
+        }
+
+        /* Inspect the bin. It is likely to be non-empty */
+        victim = last(bin);
+
+        /*  If a false alarm (empty bin), clear the bit. */
+        if (victim == bin) {
+            av->binmap[block] = map &= ~bit; /* Write through */
+            bin = next_bin(bin);
+            bit <<= 1;
+        }
+
+        else {
+            size = chunksize(victim);
+
+            /*  We know the first chunk in this bin is big enough to use. */
+            assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb));
+
+            remainder_size = size - nb;
+
+            /* unlink */
+            bck = victim->bk;
+            bin->bk = bck;
+            bck->fd = bin;
+
+            /* Exhaust */
+            if (remainder_size < MINSIZE) {
+                set_inuse_bit_at_offset(victim, size);
+                check_malloced_chunk(victim, nb);
+                return chunk2mem(victim);
+            }
+
+            /* Split */
+            else {
+                remainder = chunk_at_offset(victim, nb);
+
+                unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+                remainder->bk = remainder->fd = unsorted_chunks(av);
+                /* advertise as last remainder */
+                if (in_smallbin_range(nb)) {
+                    av->last_remainder = remainder;
+                }
+
+                set_head(victim, nb | PREV_INUSE);
+                set_head(remainder, remainder_size | PREV_INUSE);
+                set_foot(remainder, remainder_size);
+                check_malloced_chunk(victim, nb);
+                return chunk2mem(victim);
+            }
+        }
+    }
+
+use_top:
+    /*
+      If large enough, split off the chunk bordering the end of memory
+      (held in av->top). Note that this is in accord with the best-fit
+      search rule.  In effect, av->top is treated as larger (and thus
+      less well fitting) than any other available chunk since it can
+      be extended to be as large as necessary (up to system
+      limitations).
+
+      We require that av->top always exists (i.e., has size >=
+      MINSIZE) after initialization, so if it would otherwise be
+      exhuasted by current request, it is replenished. (The main
+      reason for ensuring it exists is that we may need MINSIZE space
+      to put in fenceposts in sysmalloc.)
+    */
+
+    victim = av->top;
+    size = chunksize(victim);
+
+    if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+        remainder_size = size - nb;
+        remainder = chunk_at_offset(victim, nb);
+        av->top = remainder;
+        set_head(victim, nb | PREV_INUSE);
+        set_head(remainder, remainder_size | PREV_INUSE);
+
+        check_malloced_chunk(victim, nb);
+        return chunk2mem(victim);
+    }
+
+    /*
+       If no space in top, relay to handle system-dependent cases
+    */
+    return sYSMALLOc(nb, av);
+}
+
+/*
+  ------------------------------ free ------------------------------
+*/
+
+#if __STD_C
+void fREe(Void_t *mem)
+#else
+void fREe(mem) Void_t *mem;
+#endif
+{
+    mstate av = get_malloc_state();
+
+    mchunkptr p;              /* chunk corresponding to mem */
+    INTERNAL_SIZE_T size;     /* its size */
+    mfastbinptr *fb;          /* associated fastbin */
+    mchunkptr nextchunk;      /* next contiguous chunk */
+    INTERNAL_SIZE_T nextsize; /* its size */
+    int nextinuse;            /* true if nextchunk is used */
+    INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
+    mchunkptr bck;            /* misc temp for linking */
+    mchunkptr fwd;            /* misc temp for linking */
+
+    /* free(0) has no effect */
+    if (mem != 0) {
+        p = mem2chunk(mem);
+        size = chunksize(p);
+
+        check_inuse_chunk(p);
+
+        /*
+          If eligible, place chunk on a fastbin so it can be found
+          and used quickly in malloc.
+        */
+
+        if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast)
+
+#if TRIM_FASTBINS
+            /*
+               If TRIM_FASTBINS set, don't place chunks
+               bordering top into fastbins
+            */
+            && (chunk_at_offset(p, size) != av->top)
+#endif
+        ) {
+
+            set_fastchunks(av);
+            fb = &(av->fastbins[fastbin_index(size)]);
+            p->fd = *fb;
+            *fb = p;
+        }
+
+        /*
+           Consolidate other non-mmapped chunks as they arrive.
+        */
+
+        else if (!chunk_is_mmapped(p)) {
+            set_anychunks(av);
+
+            nextchunk = chunk_at_offset(p, size);
+            nextsize = chunksize(nextchunk);
+
+            /* consolidate backward */
+            if (!prev_inuse(p)) {
+                prevsize = p->prev_size;
+                size += prevsize;
+                p = chunk_at_offset(p, -((long)prevsize));
+                unlink(p, bck, fwd);
+            }
+
+            if (nextchunk != av->top) {
+                /* get and clear inuse bit */
+                nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+                set_head(nextchunk, nextsize);
+
+                /* consolidate forward */
+                if (!nextinuse) {
+                    unlink(nextchunk, bck, fwd);
+                    size += nextsize;
+                }
+
+                /*
+                  Place the chunk in unsorted chunk list. Chunks are
+                  not placed into regular bins until after they have
+                  been given one chance to be used in malloc.
+                */
+
+                bck = unsorted_chunks(av);
+                fwd = bck->fd;
+                p->bk = bck;
+                p->fd = fwd;
+                bck->fd = p;
+                fwd->bk = p;
+
+                set_head(p, size | PREV_INUSE);
+                set_foot(p, size);
+
+                check_free_chunk(p);
+            }
+
+            /*
+               If the chunk borders the current high end of memory,
+               consolidate into top
+            */
+
+            else {
+                size += nextsize;
+                set_head(p, size | PREV_INUSE);
+                av->top = p;
+                check_chunk(p);
+            }
+
+            /*
+              If freeing a large space, consolidate possibly-surrounding
+              chunks. Then, if the total unused topmost memory exceeds trim
+              threshold, ask malloc_trim to reduce top.
+
+              Unless max_fast is 0, we don't know if there are fastbins
+              bordering top, so we cannot tell for sure whether threshold
+              has been reached unless fastbins are consolidated.  But we
+              don't want to consolidate on each free.  As a compromise,
+              consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+              is reached.
+            */
+
+            if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+                if (have_fastchunks(av)) {
+                    malloc_consolidate(av);
+                }
+
+#ifndef MORECORE_CANNOT_TRIM
+                if ((CHUNK_SIZE_T)(chunksize(av->top)) >=
+                    (CHUNK_SIZE_T)(av->trim_threshold)) {
+                    sYSTRIm(av->top_pad, av);
+                }
+#endif
+            }
+        }
+        /*
+          If the chunk was allocated via mmap, release via munmap()
+          Note that if HAVE_MMAP is false but chunk_is_mmapped is
+          true, then user must have overwritten memory. There's nothing
+          we can do to catch this error unless DL_DEBUG is set, in which case
+          check_inuse_chunk (above) will have triggered error.
+        */
+
+        else {
+#if HAVE_MMAP
+            INTERNAL_SIZE_T offset = p->prev_size;
+            av->n_mmaps--;
+            av->mmapped_mem -= (size + offset);
+            munmap((char *)p - offset, size + offset);
+#endif
+        }
+    }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+#if __STD_C
+static void malloc_consolidate(mstate av)
+#else
+static void malloc_consolidate(av) mstate av;
+#endif
+{
+    mfastbinptr *fb;          /* current fastbin being consolidated */
+    mfastbinptr *maxfb;       /* last fastbin (for loop control) */
+    mchunkptr p;              /* current chunk being consolidated */
+    mchunkptr nextp;          /* next chunk to consolidate */
+    mchunkptr unsorted_bin;   /* bin header */
+    mchunkptr first_unsorted; /* chunk to link to */
+
+    /* These have same use as in free() */
+    mchunkptr nextchunk;
+    INTERNAL_SIZE_T size;
+    INTERNAL_SIZE_T nextsize;
+    INTERNAL_SIZE_T prevsize;
+    int nextinuse;
+    mchunkptr bck;
+    mchunkptr fwd;
+
+    /*
+      If max_fast is 0, we know that av hasn't
+      yet been initialized, in which case do so below
+    */
+
+    if (av->max_fast != 0) {
+        clear_fastchunks(av);
+
+        unsorted_bin = unsorted_chunks(av);
+
+        /*
+          Remove each chunk from fast bin and consolidate it, placing it
+          then in unsorted bin. Among other reasons for doing this,
+          placing in unsorted bin avoids needing to calculate actual bins
+          until malloc is sure that chunks aren't immediately going to be
+          reused anyway.
+        */
+
+        maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
+        fb = &(av->fastbins[0]);
+        do {
+            if ((p = *fb) != 0) {
+                *fb = 0;
+
+                do {
+                    check_inuse_chunk(p);
+                    nextp = p->fd;
+
+                    /* Slightly streamlined version of consolidation code in free() */
+                    size = p->size & ~PREV_INUSE;
+                    nextchunk = chunk_at_offset(p, size);
+                    nextsize = chunksize(nextchunk);
+
+                    if (!prev_inuse(p)) {
+                        prevsize = p->prev_size;
+                        size += prevsize;
+                        p = chunk_at_offset(p, -((long)prevsize));
+                        unlink(p, bck, fwd);
+                    }
+
+                    if (nextchunk != av->top) {
+                        nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+                        set_head(nextchunk, nextsize);
+
+                        if (!nextinuse) {
+                            size += nextsize;
+                            unlink(nextchunk, bck, fwd);
+                        }
+
+                        first_unsorted = unsorted_bin->fd;
+                        unsorted_bin->fd = p;
+                        first_unsorted->bk = p;
+
+                        set_head(p, size | PREV_INUSE);
+                        p->bk = unsorted_bin;
+                        p->fd = first_unsorted;
+                        set_foot(p, size);
+                    }
+
+                    else {
+                        size += nextsize;
+                        set_head(p, size | PREV_INUSE);
+                        av->top = p;
+                    }
+
+                } while ((p = nextp) != 0);
+            }
+        } while (fb++ != maxfb);
+    } else {
+        malloc_init_state(av);
+        check_malloc_state();
+    }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+#if __STD_C
+Void_t *rEALLOc(Void_t *oldmem, size_t bytes)
+#else
+Void_t *rEALLOc(oldmem, bytes)
+Void_t *oldmem;
+size_t bytes;
+#endif
+{
+    mstate av = get_malloc_state();
+
+    INTERNAL_SIZE_T nb; /* padded request size */
+
+    mchunkptr oldp;          /* chunk corresponding to oldmem */
+    INTERNAL_SIZE_T oldsize; /* its size */
+
+    mchunkptr newp;          /* chunk to return */
+    INTERNAL_SIZE_T newsize; /* its size */
+    Void_t *newmem;          /* corresponding user mem */
+
+    mchunkptr next; /* next contiguous chunk after oldp */
+
+    mchunkptr remainder;         /* extra space at end of newp */
+    CHUNK_SIZE_T remainder_size; /* its size */
+
+    mchunkptr bck; /* misc temp for linking */
+    mchunkptr fwd; /* misc temp for linking */
+
+    CHUNK_SIZE_T copysize; /* bytes to copy */
+    unsigned int ncopies;  /* INTERNAL_SIZE_T words to copy */
+    INTERNAL_SIZE_T *s;    /* copy source */
+    INTERNAL_SIZE_T *d;    /* copy destination */
+
+#ifdef REALLOC_ZERO_BYTES_FREES
+    if (bytes == 0) {
+        fREe(oldmem);
+        return 0;
+    }
+#endif
+
+    /* realloc of null is supposed to be same as malloc */
+    if (oldmem == 0) {
+        return mALLOc(bytes);
+    }
+
+    checked_request2size(bytes, nb);
+
+    oldp = mem2chunk(oldmem);
+    oldsize = chunksize(oldp);
+
+    check_inuse_chunk(oldp);
+
+    if (!chunk_is_mmapped(oldp)) {
+
+        if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) {
+            /* already big enough; split below */
+            newp = oldp;
+            newsize = oldsize;
+        }
+
+        else {
+            next = chunk_at_offset(oldp, oldsize);
+
+            /* Try to expand forward into top */
+            if (next == av->top &&
+                (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+                    (CHUNK_SIZE_T)(nb + MINSIZE)) {
+                set_head_size(oldp, nb);
+                av->top = chunk_at_offset(oldp, nb);
+                set_head(av->top, (newsize - nb) | PREV_INUSE);
+                return chunk2mem(oldp);
+            }
+
+            /* Try to expand forward into next chunk;  split off remainder below */
+            else if (next != av->top &&
+                     !inuse(next) &&
+                     (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+                         (CHUNK_SIZE_T)(nb)) {
+                newp = oldp;
+                unlink(next, bck, fwd);
+            }
+
+            /* allocate, copy, free */
+            else {
+                newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+                if (newmem == 0) {
+                    return 0; /* propagate failure */
+                }
+
+                newp = mem2chunk(newmem);
+                newsize = chunksize(newp);
+
+                /*
+                  Avoid copy if newp is next chunk after oldp.
+                */
+                if (newp == next) {
+                    newsize += oldsize;
+                    newp = oldp;
+                } else {
+                    /*
+                      Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                      We know that contents have an odd number of
+                      INTERNAL_SIZE_T-sized words; minimally 3.
+                    */
+
+                    copysize = oldsize - SIZE_SZ;
+                    s = (INTERNAL_SIZE_T *)(oldmem);
+                    d = (INTERNAL_SIZE_T *)(newmem);
+                    ncopies = copysize / sizeof(INTERNAL_SIZE_T);
+                    assert(ncopies >= 3);
+
+                    if (ncopies > 9) {
+                        MALLOC_COPY(d, s, copysize);
+                    }
+
+                    else {
+                        *(d + 0) = *(s + 0);
+                        *(d + 1) = *(s + 1);
+                        *(d + 2) = *(s + 2);
+                        if (ncopies > 4) {
+                            *(d + 3) = *(s + 3);
+                            *(d + 4) = *(s + 4);
+                            if (ncopies > 6) {
+                                *(d + 5) = *(s + 5);
+                                *(d + 6) = *(s + 6);
+                                if (ncopies > 8) {
+                                    *(d + 7) = *(s + 7);
+                                    *(d + 8) = *(s + 8);
+                                }
+                            }
+                        }
+                    }
+
+                    fREe(oldmem);
+                    check_inuse_chunk(newp);
+                    return chunk2mem(newp);
+                }
+            }
+        }
+
+        /* If possible, free extra space in old or extended chunk */
+
+        assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb));
+
+        remainder_size = newsize - nb;
+
+        if (remainder_size < MINSIZE) { /* not enough extra to split off */
+            set_head_size(newp, newsize);
+            set_inuse_bit_at_offset(newp, newsize);
+        } else { /* split remainder */
+            remainder = chunk_at_offset(newp, nb);
+            set_head_size(newp, nb);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            /* Mark remainder as inuse so free() won't complain */
+            set_inuse_bit_at_offset(remainder, remainder_size);
+            fREe(chunk2mem(remainder));
+        }
+
+        check_inuse_chunk(newp);
+        return chunk2mem(newp);
+    }
+
+    /*
+      Handle mmap cases
+    */
+
+    else {
+#if HAVE_MMAP
+
+#if HAVE_MREMAP
+        INTERNAL_SIZE_T offset = oldp->prev_size;
+        size_t pagemask = av->pagesize - 1;
+        char *cp;
+        CHUNK_SIZE_T sum;
+
+        /* Note the extra SIZE_SZ overhead */
+        newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
+
+        /* don't need to remap if still within same page */
+        if (oldsize == newsize - offset) {
+            return oldmem;
+        }
+
+        cp = (char *)mremap((char *)oldp - offset, oldsize + offset, newsize, 1);
+
+        if (cp != (char *)MORECORE_FAILURE) {
+
+            newp = (mchunkptr)(cp + offset);
+            set_head(newp, (newsize - offset) | IS_MMAPPED);
+
+            assert(aligned_OK(chunk2mem(newp)));
+            assert((newp->prev_size == offset));
+
+            /* update statistics */
+            sum = av->mmapped_mem += newsize - oldsize;
+            if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) {
+                av->max_mmapped_mem = sum;
+            }
+            sum += av->sbrked_mem;
+            if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
+                av->max_total_mem = sum;
+            }
+
+            return chunk2mem(newp);
+        }
+#endif
+
+        /* Note the extra SIZE_SZ overhead. */
+        if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) {
+            newmem = oldmem; /* do nothing */
+        } else {
+            /* Must alloc, copy, free. */
+            newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+            if (newmem != 0) {
+                MALLOC_COPY(newmem, oldmem, oldsize - 2 * SIZE_SZ);
+                fREe(oldmem);
+            }
+        }
+        return newmem;
+
+#else
+        /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
+        check_malloc_state();
+        MALLOC_FAILURE_ACTION;
+        return 0;
+#endif
+    }
+}
+
+/*
+  ------------------------------ memalign ------------------------------
+*/
+
+#if __STD_C
+Void_t *mEMALIGn(size_t alignment, size_t bytes)
+#else
+Void_t *mEMALIGn(alignment, bytes)
+size_t alignment;
+size_t bytes;
+#endif
+{
+    INTERNAL_SIZE_T nb;          /* padded  request size */
+    char *m;                     /* memory returned by malloc call */
+    mchunkptr p;                 /* corresponding chunk */
+    char *brk;                   /* alignment point within p */
+    mchunkptr newp;              /* chunk to return */
+    INTERNAL_SIZE_T newsize;     /* its size */
+    INTERNAL_SIZE_T leadsize;    /* leading space before alignment point */
+    mchunkptr remainder;         /* spare room at end to split off */
+    CHUNK_SIZE_T remainder_size; /* its size */
+    INTERNAL_SIZE_T size;
+
+    /* If need less alignment than we give anyway, just relay to malloc */
+
+    if (alignment <= MALLOC_ALIGNMENT) {
+        return mALLOc(bytes);
+    }
+
+    /* Otherwise, ensure that it is at least a minimum chunk size */
+
+    if (alignment < MINSIZE) {
+        alignment = MINSIZE;
+    }
+
+    /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
+    if ((alignment & (alignment - 1)) != 0) {
+        size_t a = MALLOC_ALIGNMENT * 2;
+        while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) {
+            a <<= 1;
+        }
+        alignment = a;
+    }
+
+    checked_request2size(bytes, nb);
+
+    /*
+      Strategy: find a spot within that chunk that meets the alignment
+      request, and then possibly free the leading and trailing space.
+    */
+
+    /* Call malloc with worst case padding to hit alignment. */
+
+    m = (char *)(mALLOc(nb + alignment + MINSIZE));
+
+    if (m == 0) {
+        return 0; /* propagate failure */
+    }
+
+    p = mem2chunk(m);
+
+    if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */
+
+        /*
+          Find an aligned spot inside chunk.  Since we need to give back
+          leading space in a chunk of at least MINSIZE, if the first
+          calculation places us at a spot with less than MINSIZE leader,
+          we can move to the next aligned spot -- we've allocated enough
+          total room so that this is always possible.
+        */
+
+        brk = (char *)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) &
+                                           -((signed long)alignment)));
+        if ((CHUNK_SIZE_T)(brk - (char *)(p)) < MINSIZE) {
+            brk += alignment;
+        }
+
+        newp = (mchunkptr)brk;
+        leadsize = brk - (char *)(p);
+        newsize = chunksize(p) - leadsize;
+
+        /* For mmapped chunks, just adjust offset */
+        if (chunk_is_mmapped(p)) {
+            newp->prev_size = p->prev_size + leadsize;
+            set_head(newp, newsize | IS_MMAPPED);
+            return chunk2mem(newp);
+        }
+
+        /* Otherwise, give back leader, use the rest */
+        set_head(newp, newsize | PREV_INUSE);
+        set_inuse_bit_at_offset(newp, newsize);
+        set_head_size(p, leadsize);
+        fREe(chunk2mem(p));
+        p = newp;
+
+        assert(newsize >= nb &&
+               (((PTR_UINT)(chunk2mem(p))) % alignment) == 0);
+    }
+
+    /* Also give back spare room at the end */
+    if (!chunk_is_mmapped(p)) {
+        size = chunksize(p);
+        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
+            remainder_size = size - nb;
+            remainder = chunk_at_offset(p, nb);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            set_head_size(p, nb);
+            fREe(chunk2mem(remainder));
+        }
+    }
+
+    check_inuse_chunk(p);
+    return chunk2mem(p);
+}
+
+/*
+  ------------------------------ calloc ------------------------------
+*/
+
+#if __STD_C
+Void_t *cALLOc(size_t n_elements, size_t elem_size)
+#else
+Void_t *cALLOc(n_elements, elem_size)
+size_t n_elements;
+size_t elem_size;
+#endif
+{
+    mchunkptr p;
+    CHUNK_SIZE_T clearsize;
+    CHUNK_SIZE_T nclears;
+    INTERNAL_SIZE_T *d;
+
+    Void_t *mem = mALLOc(n_elements * elem_size);
+
+    if (mem != 0) {
+        p = mem2chunk(mem);
+
+        if (!chunk_is_mmapped(p)) {
+            /*
+              Unroll clear of <= 36 bytes (72 if 8byte sizes)
+              We know that contents have an odd number of
+              INTERNAL_SIZE_T-sized words; minimally 3.
+            */
+
+            d = (INTERNAL_SIZE_T *)mem;
+            clearsize = chunksize(p) - SIZE_SZ;
+            nclears = clearsize / sizeof(INTERNAL_SIZE_T);
+            assert(nclears >= 3);
+
+            if (nclears > 9) {
+                MALLOC_ZERO(d, clearsize);
+            }
+
+            else {
+                *(d + 0) = 0;
+                *(d + 1) = 0;
+                *(d + 2) = 0;
+                if (nclears > 4) {
+                    *(d + 3) = 0;
+                    *(d + 4) = 0;
+                    if (nclears > 6) {
+                        *(d + 5) = 0;
+                        *(d + 6) = 0;
+                        if (nclears > 8) {
+                            *(d + 7) = 0;
+                            *(d + 8) = 0;
+                        }
+                    }
+                }
+            }
+        }
+#if !MMAP_CLEARS
+        else {
+            d = (INTERNAL_SIZE_T *)mem;
+            /*
+              Note the additional SIZE_SZ
+            */
+            clearsize = chunksize(p) - 2 * SIZE_SZ;
+            MALLOC_ZERO(d, clearsize);
+        }
+#endif
+    }
+    return mem;
+}
+
+/*
+  ------------------------------ cfree ------------------------------
+*/
+
+#if __STD_C
+void cFREe(Void_t *mem)
+#else
+void cFREe(mem) Void_t *mem;
+#endif
+{
+    fREe(mem);
+}
+
+/*
+  ------------------------- independent_calloc -------------------------
+*/
+
+#if __STD_C
+Void_t **iCALLOc(size_t n_elements, size_t elem_size, Void_t *chunks[])
+#else
+Void_t **iCALLOc(n_elements, elem_size, chunks)
+size_t n_elements;
+size_t elem_size;
+Void_t *chunks[];
+#endif
+{
+    size_t sz = elem_size; /* serves as 1-element array */
+    /* opts arg of 3 means all elements are same size, and should be cleared */
+    return iALLOc(n_elements, &sz, 3, chunks);
+}
+
+/*
+  ------------------------- independent_comalloc -------------------------
+*/
+
+#if __STD_C
+Void_t **iCOMALLOc(size_t n_elements, size_t sizes[], Void_t *chunks[])
+#else
+Void_t **iCOMALLOc(n_elements, sizes, chunks)
+size_t n_elements;
+size_t sizes[];
+Void_t *chunks[];
+#endif
+{
+    return iALLOc(n_elements, sizes, 0, chunks);
+}
+
+/*
+  ------------------------------ ialloc ------------------------------
+  ialloc provides common support for independent_X routines, handling all of
+  the combinations that can result.
+
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+
+#if __STD_C
+static Void_t **iALLOc(size_t n_elements,
+                       size_t *sizes,
+                       int opts,
+                       Void_t *chunks[])
+#else
+static Void_t **iALLOc(n_elements, sizes, opts, chunks)
+size_t n_elements;
+size_t *sizes;
+int opts;
+Void_t *chunks[];
+#endif
+{
+    mstate av = get_malloc_state();
+    INTERNAL_SIZE_T element_size;   /* chunksize of each element, if all same */
+    INTERNAL_SIZE_T contents_size;  /* total size of elements */
+    INTERNAL_SIZE_T array_size;     /* request size of pointer array */
+    Void_t *mem;                    /* malloced aggregate space */
+    mchunkptr p;                    /* corresponding chunk */
+    INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */
+    Void_t **marray;                /* either "chunks" or malloced ptr array */
+    mchunkptr array_chunk;          /* chunk for malloced ptr array */
+    int mmx;                        /* to disable mmap */
+    INTERNAL_SIZE_T size;
+    size_t i;
+
+    /* Ensure initialization */
+    if (av->max_fast == 0) {
+        malloc_consolidate(av);
+    }
+
+    /* compute array length, if needed */
+    if (chunks != 0) {
+        if (n_elements == 0) {
+            return chunks; /* nothing to do */
+        }
+        marray = chunks;
+        array_size = 0;
+    } else {
+        /* if empty req, must still return chunk representing empty array */
+        if (n_elements == 0) {
+            return (Void_t **)mALLOc(0);
+        }
+        marray = 0;
+        array_size = request2size(n_elements * (sizeof(Void_t *)));
+    }
+
+    /* compute total element size */
+    if (opts & 0x1) { /* all-same-size */
+        element_size = request2size(*sizes);
+        contents_size = n_elements * element_size;
+    } else { /* add up all the sizes */
+        element_size = 0;
+        contents_size = 0;
+        for (i = 0; i != n_elements; ++i) {
+            contents_size += request2size(sizes[i]);
+        }
+    }
+
+    /* subtract out alignment bytes from total to minimize overallocation */
+    size = contents_size + array_size - MALLOC_ALIGN_MASK;
+
+    /*
+       Allocate the aggregate chunk.
+       But first disable mmap so malloc won't use it, since
+       we would not be able to later free/realloc space internal
+       to a segregated mmap region.
+   */
+    mmx = av->n_mmaps_max; /* disable mmap */
+    av->n_mmaps_max = 0;
+    mem = mALLOc(size);
+    av->n_mmaps_max = mmx; /* reset mmap */
+    if (mem == 0) {
+        return 0;
+    }
+
+    p = mem2chunk(mem);
+    assert(!chunk_is_mmapped(p));
+    remainder_size = chunksize(p);
+
+    if (opts & 0x2) { /* optionally clear the elements */
+        MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size);
+    }
+
+    /* If not provided, allocate the pointer array as final part of chunk */
+    if (marray == 0) {
+        array_chunk = chunk_at_offset(p, contents_size);
+        marray = (Void_t **)(chunk2mem(array_chunk));
+        set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE);
+        remainder_size = contents_size;
+    }
+
+    /* split out elements */
+    for (i = 0;; ++i) {
+        marray[i] = chunk2mem(p);
+        if (i != n_elements - 1) {
+            if (element_size != 0) {
+                size = element_size;
+            } else {
+                size = request2size(sizes[i]);
+            }
+            remainder_size -= size;
+            set_head(p, size | PREV_INUSE);
+            p = chunk_at_offset(p, size);
+        } else { /* the final element absorbs any overallocation slop */
+            set_head(p, remainder_size | PREV_INUSE);
+            break;
+        }
+    }
+
+#if DL_DEBUG
+    if (marray != chunks) {
+        /* final element must have exactly exhausted chunk */
+        if (element_size != 0) {
+            assert(remainder_size == element_size);
+        } else {
+            assert(remainder_size == request2size(sizes[i]));
+        }
+        check_inuse_chunk(mem2chunk(marray));
+    }
+
+    for (i = 0; i != n_elements; ++i) {
+        check_inuse_chunk(mem2chunk(marray[i]));
+    }
+#endif
+
+    return marray;
+}
+
+/*
+  ------------------------------ valloc ------------------------------
+*/
+
+#if __STD_C
+Void_t *vALLOc(size_t bytes)
+#else
+Void_t *vALLOc(bytes)
+size_t bytes;
+#endif
+{
+    /* Ensure initialization */
+    mstate av = get_malloc_state();
+    if (av->max_fast == 0) {
+        malloc_consolidate(av);
+    }
+    return mEMALIGn(av->pagesize, bytes);
+}
+
+/*
+  ------------------------------ pvalloc ------------------------------
+*/
+
+#if __STD_C
+Void_t *pVALLOc(size_t bytes)
+#else
+Void_t *pVALLOc(bytes)
+size_t bytes;
+#endif
+{
+    mstate av = get_malloc_state();
+    size_t pagesz;
+
+    /* Ensure initialization */
+    if (av->max_fast == 0) {
+        malloc_consolidate(av);
+    }
+    pagesz = av->pagesize;
+    return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
+}
+
+/*
+  ------------------------------ malloc_trim ------------------------------
+*/
+
+#if __STD_C
+int mTRIm(size_t pad)
+#else
+int mTRIm(pad)
+size_t pad;
+#endif
+{
+    mstate av = get_malloc_state();
+    /* Ensure initialization/consolidation */
+    malloc_consolidate(av);
+
+#ifndef MORECORE_CANNOT_TRIM
+    return sYSTRIm(pad, av);
+#else
+    return 0;
+#endif
+}
+
+/*
+  ------------------------- malloc_usable_size -------------------------
+*/
+
+#if __STD_C
+size_t mUSABLe(Void_t *mem)
+#else
+size_t mUSABLe(mem)
+Void_t *mem;
+#endif
+{
+    mchunkptr p;
+    if (mem != 0) {
+        p = mem2chunk(mem);
+        if (chunk_is_mmapped(p)) {
+            return chunksize(p) - 2 * SIZE_SZ;
+        } else if (inuse(p)) {
+            return chunksize(p) - SIZE_SZ;
+        }
+    }
+    return 0;
+}
+
+/*
+  ------------------------------ mallinfo ------------------------------
+*/
+
+struct mallinfo mALLINFo() {
+    mstate av = get_malloc_state();
+    struct mallinfo mi;
+    int i;
+    mbinptr b;
+    mchunkptr p;
+    INTERNAL_SIZE_T avail;
+    INTERNAL_SIZE_T fastavail;
+    int nblocks;
+    int nfastblocks;
+
+    /* Ensure initialization */
+    if (av->top == 0) {
+        malloc_consolidate(av);
+    }
+
+    check_malloc_state();
+
+    /* Account for top */
+    avail = chunksize(av->top);
+    nblocks = 1; /* top always exists */
+
+    /* traverse fastbins */
+    nfastblocks = 0;
+    fastavail = 0;
+
+    for (i = 0; NFASTBINS - i > 0; ++i) {
+        for (p = av->fastbins[i]; p != 0; p = p->fd) {
+            ++nfastblocks;
+            fastavail += chunksize(p);
+        }
+    }
+
+    avail += fastavail;
+
+    /* traverse regular bins */
+    for (i = 1; i < NBINS; ++i) {
+        b = bin_at(av, i);
+        for (p = last(b); p != b; p = p->bk) {
+            ++nblocks;
+            avail += chunksize(p);
+        }
+    }
+
+    mi.smblks = nfastblocks;
+    mi.ordblks = nblocks;
+    mi.fordblks = avail;
+    mi.uordblks = av->sbrked_mem - avail;
+    mi.arena = av->sbrked_mem;
+    mi.hblks = av->n_mmaps;
+    mi.hblkhd = av->mmapped_mem;
+    mi.fsmblks = fastavail;
+    mi.keepcost = chunksize(av->top);
+    mi.usmblks = av->max_total_mem;
+    return mi;
+}
+
+/*
+  ------------------------------ malloc_stats ------------------------------
+*/
+
+void mSTATs(void) {
+    struct mallinfo mi = mALLINFo();
+
+#ifdef WIN32
+    {
+        CHUNK_SIZE_T free, reserved, committed;
+        vminfo(&free, &reserved, &committed);
+        fprintf(stderr, "free bytes       = %10lu\n",
+                free);
+        fprintf(stderr, "reserved bytes   = %10lu\n",
+                reserved);
+        fprintf(stderr, "committed bytes  = %10lu\n",
+                committed);
+    }
+#endif
+
+    fprintf(stderr, "max system bytes = %10lu\n",
+            (CHUNK_SIZE_T)(mi.usmblks));
+    fprintf(stderr, "system bytes     = %10lu\n",
+            (CHUNK_SIZE_T)(mi.arena + mi.hblkhd));
+    fprintf(stderr, "in use bytes     = %10lu\n",
+            (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd));
+
+#ifdef WIN32
+    {
+        CHUNK_SIZE_T kernel, user;
+        if (cpuinfo(TRUE, &kernel, &user)) {
+            fprintf(stderr, "kernel ms        = %10lu\n",
+                    kernel);
+            fprintf(stderr, "user ms          = %10lu\n",
+                    user);
+        }
+    }
+#endif
+}
+
+/*
+  ------------------------------ mallopt ------------------------------
+*/
+
+#if __STD_C
+int mALLOPt(int param_number, int value)
+#else
+int mALLOPt(param_number, value)
+int param_number;
+int value;
+#endif
+{
+    mstate av = get_malloc_state();
+    /* Ensure initialization/consolidation */
+    malloc_consolidate(av);
+
+    switch (param_number) {
+    case M_MXFAST:
+        if (value >= 0 && value <= MAX_FAST_SIZE) {
+            set_max_fast(av, value);
+            return 1;
+        } else {
+            return 0;
+        }
+
+    case M_TRIM_THRESHOLD:
+        av->trim_threshold = value;
+        return 1;
+
+    case M_TOP_PAD:
+        av->top_pad = value;
+        return 1;
+
+    case M_MMAP_THRESHOLD:
+        av->mmap_threshold = value;
+        return 1;
+
+    case M_MMAP_MAX:
+#if !HAVE_MMAP
+        if (value != 0) {
+            return 0;
+        }
+#endif
+        av->n_mmaps_max = value;
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
+/*
+  -------------------- Alternative MORECORE functions --------------------
+*/
+
+/*
+  General Requirements for MORECORE.
+
+  The MORECORE function must have the following properties:
+
+  If MORECORE_CONTIGUOUS is false:
+
+    * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+    * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+  else (i.e. If MORECORE_CONTIGUOUS is true):
+
+    * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+    * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+    * MORECORE need not page-align.
+
+  In either case:
+
+    * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+    * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+    * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+    * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  There is some variation across systems about the type of the
+  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+  actually be size_t, because sbrk supports negative args, so it is
+  normally the signed type of the same width as size_t (sometimes
+  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+  matter though. Internally, we use "long" as arguments, which should
+  work across all reasonable possibilities.
+
+  Additionally, if MORECORE ever returns failure for a positive
+  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
+  system allocator. This is a useful backup strategy for systems with
+  holes in address spaces -- in this case sbrk cannot contiguously
+  expand the heap, but mmap may be able to map noncontiguous space.
+
+  If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+  a function that always returns MORECORE_FAILURE.
+
+  Malloc only has limited ability to detect failures of MORECORE
+  to supply contiguous space when it says it can. In particular,
+  multithreaded programs that do not use locks may result in
+  rece conditions across calls to MORECORE that result in gaps
+  that cannot be detected as such, and subsequent corruption.
+
+  If you are using this malloc with something other than sbrk (or its
+  emulation) to supply memory regions, you probably want to set
+  MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+  allocator kindly contributed for pre-OSX macOS.  It uses virtually
+  but not necessarily physically contiguous non-paged memory (locked
+  in, present and won't get swapped out).  You can use it by
+  uncommenting this section, adding some #includes, and setting up the
+  appropriate defines above:
+
+      #define MORECORE osMoreCore
+      #define MORECORE_CONTIGUOUS 0
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+/*
+  --------------------------------------------------------------
+
+  Emulation of sbrk for win32.
+  Donated by J. Walter <Walter@GeNeSys-e.de>.
+  For additional information about this code, and malloc on Win32, see
+     http://www.genesys-e.de/jwalter/
+*/
+
+#ifdef WIN32
+
+#ifdef _DEBUG
+/* #define TRACE */
+#endif
+
+/* Support for USE_MALLOC_LOCK */
+#ifdef USE_MALLOC_LOCK
+
+/* Wait for spin lock */
+static int slwait(int *sl) {
+    while (InterlockedCompareExchange((void **)sl, (void *)1, (void *)0) != 0) {
+        Sleep(0);
+    }
+    return 0;
+}
+
+/* Release spin lock */
+static int slrelease(int *sl) {
+    InterlockedExchange(sl, 0);
+    return 0;
+}
+
+#ifdef NEEDED
+/* Spin lock for emulation code */
+static int g_sl;
+#endif
+
+#endif /* USE_MALLOC_LOCK */
+
+/* getpagesize for windows */
+static long getpagesize(void) {
+    static long g_pagesize = 0;
+    if (!g_pagesize) {
+        SYSTEM_INFO system_info;
+        GetSystemInfo(&system_info);
+        g_pagesize = system_info.dwPageSize;
+    }
+    return g_pagesize;
+}
+static long getregionsize(void) {
+    static long g_regionsize = 0;
+    if (!g_regionsize) {
+        SYSTEM_INFO system_info;
+        GetSystemInfo(&system_info);
+        g_regionsize = system_info.dwAllocationGranularity;
+    }
+    return g_regionsize;
+}
+
+/* A region list entry */
+typedef struct _region_list_entry {
+    void *top_allocated;
+    void *top_committed;
+    void *top_reserved;
+    long reserve_size;
+    struct _region_list_entry *previous;
+} region_list_entry;
+
+/* Allocate and link a region entry in the region list */
+static int region_list_append(region_list_entry **last, void *base_reserved, long reserve_size) {
+    region_list_entry *next = HeapAlloc(GetProcessHeap(), 0, sizeof(region_list_entry));
+    if (!next) {
+        return FALSE;
+    }
+    next->top_allocated = (char *)base_reserved;
+    next->top_committed = (char *)base_reserved;
+    next->top_reserved = (char *)base_reserved + reserve_size;
+    next->reserve_size = reserve_size;
+    next->previous = *last;
+    *last = next;
+    return TRUE;
+}
+/* Free and unlink the last region entry from the region list */
+static int region_list_remove(region_list_entry **last) {
+    region_list_entry *previous = (*last)->previous;
+    if (!HeapFree(GetProcessHeap(), sizeof(region_list_entry), *last)) {
+        return FALSE;
+    }
+    *last = previous;
+    return TRUE;
+}
+
+#define CEIL(size, to) (((size) + (to) - 1) & ~((to) - 1))
+#define FLOOR(size, to) ((size) & ~((to) - 1))
+
+#define SBRK_SCALE 0
+/* #define SBRK_SCALE  1 */
+/* #define SBRK_SCALE  2 */
+/* #define SBRK_SCALE  4  */
+
+/* sbrk for windows */
+static void *sbrk(long size) {
+    static long g_pagesize, g_my_pagesize;
+    static long g_regionsize, g_my_regionsize;
+    static region_list_entry *g_last;
+    void *result = (void *)MORECORE_FAILURE;
+#ifdef TRACE
+    printf("sbrk %d\n", size);
+#endif
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Wait for spin lock */
+    slwait(&g_sl);
+#endif
+    /* First time initialization */
+    if (!g_pagesize) {
+        g_pagesize = getpagesize();
+        g_my_pagesize = g_pagesize << SBRK_SCALE;
+    }
+    if (!g_regionsize) {
+        g_regionsize = getregionsize();
+        g_my_regionsize = g_regionsize << SBRK_SCALE;
+    }
+    if (!g_last) {
+        if (!region_list_append(&g_last, 0, 0)) {
+            goto sbrk_exit;
+        }
+    }
+    /* Assert invariants */
+    assert(g_last);
+    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated &&
+           g_last->top_allocated <= g_last->top_committed);
+    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed &&
+           g_last->top_committed <= g_last->top_reserved &&
+           (unsigned)g_last->top_committed % g_pagesize == 0);
+    assert((unsigned)g_last->top_reserved % g_regionsize == 0);
+    assert((unsigned)g_last->reserve_size % g_regionsize == 0);
+    /* Allocation requested? */
+    if (size >= 0) {
+        /* Allocation size is the requested size */
+        long allocate_size = size;
+        /* Compute the size to commit */
+        long to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
+        /* Do we reach the commit limit? */
+        if (to_commit > 0) {
+            /* Round size to commit */
+            long commit_size = CEIL(to_commit, g_my_pagesize);
+            /* Compute the size to reserve */
+            long to_reserve = (char *)g_last->top_committed + commit_size - (char *)g_last->top_reserved;
+            /* Do we reach the reserve limit? */
+            if (to_reserve > 0) {
+                /* Compute the remaining size to commit in the current region */
+                long remaining_commit_size = (char *)g_last->top_reserved - (char *)g_last->top_committed;
+                if (remaining_commit_size > 0) {
+                    /* Assert preconditions */
+                    assert((unsigned)g_last->top_committed % g_pagesize == 0);
+                    assert(0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0);
+                    {
+                        /* Commit this */
+                        void *base_committed = VirtualAlloc(g_last->top_committed, remaining_commit_size,
+                                                            MEM_COMMIT, PAGE_READWRITE);
+                        /* Check returned pointer for consistency */
+                        if (base_committed != g_last->top_committed) {
+                            goto sbrk_exit;
+                        }
+                        /* Assert postconditions */
+                        assert((unsigned)base_committed % g_pagesize == 0);
+#ifdef TRACE
+                        printf("Commit %p %d\n", base_committed, remaining_commit_size);
+#endif
+                        /* Adjust the regions commit top */
+                        g_last->top_committed = (char *)base_committed + remaining_commit_size;
+                    }
+                }
+                {
+                    /* Now we are going to search and reserve. */
+                    int contiguous = -1;
+                    int found = FALSE;
+                    MEMORY_BASIC_INFORMATION memory_info;
+                    void *base_reserved;
+                    long reserve_size;
+                    do {
+                        /* Assume contiguous memory */
+                        contiguous = TRUE;
+                        /* Round size to reserve */
+                        reserve_size = CEIL(to_reserve, g_my_regionsize);
+                        /* Start with the current region's top */
+                        memory_info.BaseAddress = g_last->top_reserved;
+                        /* Assert preconditions */
+                        assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
+                        assert(0 < reserve_size && reserve_size % g_regionsize == 0);
+                        while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) {
+                            /* Assert postconditions */
+                            assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
+#ifdef TRACE
+                            printf("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize,
+                                   memory_info.State == MEM_FREE ? "FREE" : (memory_info.State == MEM_RESERVE ? "RESERVED" : (memory_info.State == MEM_COMMIT ? "COMMITTED" : "?")));
+#endif
+                            /* Region is free, well aligned and big enough: we are done */
+                            if (memory_info.State == MEM_FREE &&
+                                (unsigned)memory_info.BaseAddress % g_regionsize == 0 &&
+                                memory_info.RegionSize >= (unsigned)reserve_size) {
+                                found = TRUE;
+                                break;
+                            }
+                            /* From now on we can't get contiguous memory! */
+                            contiguous = FALSE;
+                            /* Recompute size to reserve */
+                            reserve_size = CEIL(allocate_size, g_my_regionsize);
+                            memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize;
+                            /* Assert preconditions */
+                            assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
+                            assert(0 < reserve_size && reserve_size % g_regionsize == 0);
+                        }
+                        /* Search failed? */
+                        if (!found) {
+                            goto sbrk_exit;
+                        }
+                        /* Assert preconditions */
+                        assert((unsigned)memory_info.BaseAddress % g_regionsize == 0);
+                        assert(0 < reserve_size && reserve_size % g_regionsize == 0);
+                        /* Try to reserve this */
+                        base_reserved = VirtualAlloc(memory_info.BaseAddress, reserve_size,
+                                                     MEM_RESERVE, PAGE_NOACCESS);
+                        if (!base_reserved) {
+                            int rc = GetLastError();
+                            if (rc != ERROR_INVALID_ADDRESS) {
+                                goto sbrk_exit;
+                            }
+                        }
+                        /* A null pointer signals (hopefully) a race condition with another thread. */
+                        /* In this case, we try again. */
+                    } while (!base_reserved);
+                    /* Check returned pointer for consistency */
+                    if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress) {
+                        goto sbrk_exit;
+                    }
+                    /* Assert postconditions */
+                    assert((unsigned)base_reserved % g_regionsize == 0);
+#ifdef TRACE
+                    printf("Reserve %p %d\n", base_reserved, reserve_size);
+#endif
+                    /* Did we get contiguous memory? */
+                    if (contiguous) {
+                        long start_size = (char *)g_last->top_committed - (char *)g_last->top_allocated;
+                        /* Adjust allocation size */
+                        allocate_size -= start_size;
+                        /* Adjust the regions allocation top */
+                        g_last->top_allocated = g_last->top_committed;
+                        /* Recompute the size to commit */
+                        to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
+                        /* Round size to commit */
+                        commit_size = CEIL(to_commit, g_my_pagesize);
+                    }
+                    /* Append the new region to the list */
+                    if (!region_list_append(&g_last, base_reserved, reserve_size)) {
+                        goto sbrk_exit;
+                    }
+                    /* Didn't we get contiguous memory? */
+                    if (!contiguous) {
+                        /* Recompute the size to commit */
+                        to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
+                        /* Round size to commit */
+                        commit_size = CEIL(to_commit, g_my_pagesize);
+                    }
+                }
+            }
+            /* Assert preconditions */
+            assert((unsigned)g_last->top_committed % g_pagesize == 0);
+            assert(0 < commit_size && commit_size % g_pagesize == 0);
+            {
+                /* Commit this */
+                void *base_committed = VirtualAlloc(g_last->top_committed, commit_size,
+                                                    MEM_COMMIT, PAGE_READWRITE);
+                /* Check returned pointer for consistency */
+                if (base_committed != g_last->top_committed) {
+                    goto sbrk_exit;
+                }
+                /* Assert postconditions */
+                assert((unsigned)base_committed % g_pagesize == 0);
+#ifdef TRACE
+                printf("Commit %p %d\n", base_committed, commit_size);
+#endif
+                /* Adjust the regions commit top */
+                g_last->top_committed = (char *)base_committed + commit_size;
+            }
+        }
+        /* Adjust the regions allocation top */
+        g_last->top_allocated = (char *)g_last->top_allocated + allocate_size;
+        result = (char *)g_last->top_allocated - size;
+        /* Deallocation requested? */
+    } else if (size < 0) {
+        long deallocate_size = -size;
+        /* As long as we have a region to release */
+        while ((char *)g_last->top_allocated - deallocate_size < (char *)g_last->top_reserved - g_last->reserve_size) {
+            /* Get the size to release */
+            long release_size = g_last->reserve_size;
+            /* Get the base address */
+            void *base_reserved = (char *)g_last->top_reserved - release_size;
+            /* Assert preconditions */
+            assert((unsigned)base_reserved % g_regionsize == 0);
+            assert(0 < release_size && release_size % g_regionsize == 0);
+            {
+                /* Release this */
+                int rc = VirtualFree(base_reserved, 0,
+                                     MEM_RELEASE);
+                /* Check returned code for consistency */
+                if (!rc) {
+                    goto sbrk_exit;
+                }
+#ifdef TRACE
+                printf("Release %p %d\n", base_reserved, release_size);
+#endif
+            }
+            /* Adjust deallocation size */
+            deallocate_size -= (char *)g_last->top_allocated - (char *)base_reserved;
+            /* Remove the old region from the list */
+            if (!region_list_remove(&g_last)) {
+                goto sbrk_exit;
+            }
+        }
+        {
+            /* Compute the size to decommit */
+            long to_decommit = (char *)g_last->top_committed - ((char *)g_last->top_allocated - deallocate_size);
+            if (to_decommit >= g_my_pagesize) {
+                /* Compute the size to decommit */
+                long decommit_size = FLOOR(to_decommit, g_my_pagesize);
+                /*  Compute the base address */
+                void *base_committed = (char *)g_last->top_committed - decommit_size;
+                /* Assert preconditions */
+                assert((unsigned)base_committed % g_pagesize == 0);
+                assert(0 < decommit_size && decommit_size % g_pagesize == 0);
+                {
+                    /* Decommit this */
+                    int rc = VirtualFree((char *)base_committed, decommit_size,
+                                         MEM_DECOMMIT);
+                    /* Check returned code for consistency */
+                    if (!rc) {
+                        goto sbrk_exit;
+                    }
+#ifdef TRACE
+                    printf("Decommit %p %d\n", base_committed, decommit_size);
+#endif
+                }
+                /* Adjust deallocation size and regions commit and allocate top */
+                deallocate_size -= (char *)g_last->top_allocated - (char *)base_committed;
+                g_last->top_committed = base_committed;
+                g_last->top_allocated = base_committed;
+            }
+        }
+        /* Adjust regions allocate top */
+        g_last->top_allocated = (char *)g_last->top_allocated - deallocate_size;
+        /* Check for underflow */
+        if ((char *)g_last->top_reserved - g_last->reserve_size > (char *)g_last->top_allocated ||
+            g_last->top_allocated > g_last->top_committed) {
+            /* Adjust regions allocate top */
+            g_last->top_allocated = (char *)g_last->top_reserved - g_last->reserve_size;
+            goto sbrk_exit;
+        }
+        result = g_last->top_allocated;
+    }
+    /* Assert invariants */
+    assert(g_last);
+    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated &&
+           g_last->top_allocated <= g_last->top_committed);
+    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed &&
+           g_last->top_committed <= g_last->top_reserved &&
+           (unsigned)g_last->top_committed % g_pagesize == 0);
+    assert((unsigned)g_last->top_reserved % g_regionsize == 0);
+    assert((unsigned)g_last->reserve_size % g_regionsize == 0);
+
+sbrk_exit:
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Release spin lock */
+    slrelease(&g_sl);
+#endif
+    return result;
+}
+
+/* mmap for windows */
+static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg) {
+    static long g_pagesize;
+    static long g_regionsize;
+#ifdef TRACE
+    printf("mmap %d\n", size);
+#endif
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Wait for spin lock */
+    slwait(&g_sl);
+#endif
+    /* First time initialization */
+    if (!g_pagesize) {
+        g_pagesize = getpagesize();
+    }
+    if (!g_regionsize) {
+        g_regionsize = getregionsize();
+    }
+    /* Assert preconditions */
+    assert((unsigned)ptr % g_regionsize == 0);
+    assert(size % g_pagesize == 0);
+    /* Allocate this */
+    ptr = VirtualAlloc(ptr, size,
+                       MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE);
+    if (!ptr) {
+        ptr = (void *)MORECORE_FAILURE;
+        goto mmap_exit;
+    }
+    /* Assert postconditions */
+    assert((unsigned)ptr % g_regionsize == 0);
+#ifdef TRACE
+    printf("Commit %p %d\n", ptr, size);
+#endif
+mmap_exit:
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Release spin lock */
+    slrelease(&g_sl);
+#endif
+    return ptr;
+}
+
+/* munmap for windows */
+static long munmap(void *ptr, long size) {
+    static long g_pagesize;
+    static long g_regionsize;
+    int rc = MUNMAP_FAILURE;
+#ifdef TRACE
+    printf("munmap %p %d\n", ptr, size);
+#endif
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Wait for spin lock */
+    slwait(&g_sl);
+#endif
+    /* First time initialization */
+    if (!g_pagesize) {
+        g_pagesize = getpagesize();
+    }
+    if (!g_regionsize) {
+        g_regionsize = getregionsize();
+    }
+    /* Assert preconditions */
+    assert((unsigned)ptr % g_regionsize == 0);
+    assert(size % g_pagesize == 0);
+    /* Free this */
+    if (!VirtualFree(ptr, 0,
+                     MEM_RELEASE)) {
+        goto munmap_exit;
+    }
+    rc = 0;
+#ifdef TRACE
+    printf("Release %p %d\n", ptr, size);
+#endif
+munmap_exit:
+#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
+    /* Release spin lock */
+    slrelease(&g_sl);
+#endif
+    return rc;
+}
+
+static void vminfo(CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T *committed) {
+    MEMORY_BASIC_INFORMATION memory_info;
+    memory_info.BaseAddress = 0;
+    *free = *reserved = *committed = 0;
+    while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) {
+        switch (memory_info.State) {
+        case MEM_FREE:
+            *free += memory_info.RegionSize;
+            break;
+        case MEM_RESERVE:
+            *reserved += memory_info.RegionSize;
+            break;
+        case MEM_COMMIT:
+            *committed += memory_info.RegionSize;
+            break;
+        }
+        memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize;
+    }
+}
+
+static int cpuinfo(int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) {
+    if (whole) {
+        __int64 creation64, exit64, kernel64, user64;
+        int rc = GetProcessTimes(GetCurrentProcess(),
+                                 (FILETIME *)&creation64,
+                                 (FILETIME *)&exit64,
+                                 (FILETIME *)&kernel64,
+                                 (FILETIME *)&user64);
+        if (!rc) {
+            *kernel = 0;
+            *user = 0;
+            return FALSE;
+        }
+        *kernel = (CHUNK_SIZE_T)(kernel64 / 10000);
+        *user = (CHUNK_SIZE_T)(user64 / 10000);
+        return TRUE;
+    } else {
+        __int64 creation64, exit64, kernel64, user64;
+        int rc = GetThreadTimes(GetCurrentThread(),
+                                (FILETIME *)&creation64,
+                                (FILETIME *)&exit64,
+                                (FILETIME *)&kernel64,
+                                (FILETIME *)&user64);
+        if (!rc) {
+            *kernel = 0;
+            *user = 0;
+            return FALSE;
+        }
+        *kernel = (CHUNK_SIZE_T)(kernel64 / 10000);
+        *user = (CHUNK_SIZE_T)(user64 / 10000);
+        return TRUE;
+    }
+}
+
+#endif /* WIN32 */
+
+/* ------------------------------------------------------------
+History:
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sYSMALLOc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
+
+#ifdef __cplusplus
+}; /* end of extern "C" */
+#endif
+
+#endif /* MALLOC_270_H */
diff --git a/vendor/rlights.h b/vendor/rlights.h
index aba907b5fb..cee06360ee 100644
--- a/vendor/rlights.h
+++ b/vendor/rlights.h
@@ -167,4 +167,4 @@ void UpdateLightValues(Shader shader, Light light)
     SetShaderValue(shader, light.colorLoc, color, SHADER_UNIFORM_VEC4);
 }
 
-#endif // RLIGHTS_IMPLEMENTATION
\ No newline at end of file
+#endif // RLIGHTS_IMPLEMENTATION

From 323c42c12e6d4101063cc9d1a2258bde570ddc5a Mon Sep 17 00:00:00 2001
From: Andrew LeFevre <jalefevre@liberty.edu>
Date: Mon, 27 Apr 2026 20:22:28 -0400
Subject: [PATCH 2/4] almost working

---
 build.sh                          |  24 ++--
 config/impulse_wars.ini           |  12 +-
 ocean/impulse_wars/benchmark.c    |  16 +--
 ocean/impulse_wars/binding.c      | 204 ++++++++++--------------------
 ocean/impulse_wars/binding.h      | 177 --------------------------
 ocean/impulse_wars/env.h          |  79 +++++++-----
 ocean/impulse_wars/game.h         |  26 ++--
 ocean/impulse_wars/impulse_wars.c |  10 +-
 ocean/impulse_wars/map.h          |  12 +-
 ocean/impulse_wars/render.h       |   4 +-
 ocean/impulse_wars/types.h        |  10 +-
 11 files changed, 183 insertions(+), 391 deletions(-)
 delete mode 100644 ocean/impulse_wars/binding.h

diff --git a/build.sh b/build.sh
index 492c033686..6cb06a7403 100755
--- a/build.sh
+++ b/build.sh
@@ -75,6 +75,7 @@ CLANG_WARN=(
     -Wno-incompatible-pointer-types-discards-qualifiers
     -Wno-error=array-parameter
 )
+CLANG_OPT=()
 
 download() {
     local name=$1 url=$2
@@ -108,14 +109,20 @@ elif [ "$ENV" = "trailer" ]; then
     OUTPUT_NAME="trailer/trailer"
 elif [ "$ENV" = "impulse_wars" ]; then
     SRC_DIR="ocean/$ENV"
-    if [ "$MODE" = "web" ]; then BOX2D_NAME='box2d-web'
-    elif [ "$PLATFORM" = "Linux" ]; then BOX2D_NAME='box2d-linux-amd64'
-    else BOX2D_NAME='box2d-macos-arm64'
+    if [ "$MODE" = "web" ]; then
+        BOX2D_NAME='box2d-web'
+    elif [ "$PLATFORM" = "Linux" ]; then
+        BOX2D_NAME='box2d-linux-amd64'
+    else
+        BOX2D_NAME='box2d-macos-arm64'
     fi
+
     BOX2D_URL="https://github.com/capnspacehook/box2d/releases/latest/download"
     download "$BOX2D_NAME" "$BOX2D_URL/$BOX2D_NAME.tar.gz"
     INCLUDES+=(-I./$BOX2D_NAME/include -I./$BOX2D_NAME/src)
     LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a")
+
+    CLANG_OPT=(-flto -fno-math-errno -march=native)
 elif [ -d "ocean/$ENV" ]; then
     SRC_DIR="ocean/$ENV"
 else
@@ -126,11 +133,11 @@ OUTPUT_NAME=${OUTPUT_NAME:-$ENV}
 
 # Standalone environment build
 if [ -n "$DEBUG" ] || [ "$MODE" = "local" ]; then
-    CLANG_OPT=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}")
+    CLANG_OPT+=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}")
     NVCC_OPT="-O0 -g"
     LINK_OPT="-g"
 else
-    CLANG_OPT=(-O2 -DNDEBUG "${CLANG_WARN[@]}")
+    CLANG_OPT+=(-O2 -DNDEBUG "${CLANG_WARN[@]}")
     NVCC_OPT="-O2 --threads 0"
     LINK_OPT="-O2"
 fi
@@ -242,6 +249,7 @@ echo "Compiling static library for $ENV..."
 ${CC:-clang} -c "${CLANG_OPT[@]}" $EXTRA_CFLAGS \
     -I. -Isrc -I$SRC_DIR -Ivendor \
     -I./$RAYLIB_NAME/include -I$CUDA_HOME/include \
+    "${INCLUDES[@]}" \
     -DPLATFORM_DESKTOP \
     -fno-semantic-interposition -fvisibility=hidden \
     -fPIC -fopenmp \
@@ -273,7 +281,7 @@ if [ -z "$MODE" ]; then
 
     LINK_CMD=(
         ${CXX:-g++} -shared -fPIC -fopenmp
-        build/bindings.o "$STATIC_LIB" "$RAYLIB_A"
+        build/bindings.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}"
         -L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG
         "${WHEEL_RPATH_FLAGS[@]}"
         -lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn
@@ -298,7 +306,7 @@ elif [ "$MODE" = "cpu" ]; then
         src/bindings_cpu.cpp -o build/bindings_cpu.o
     LINK_CMD=(
         ${CXX:-g++} -shared -fPIC -fopenmp
-        build/bindings_cpu.o "$STATIC_LIB" "$RAYLIB_A"
+        build/bindings_cpu.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}"
         -lm -lpthread $OMP_LIB $LINK_OPT
         "${SHARED_LDFLAGS[@]}"
         -o "$OUTPUT"
@@ -317,7 +325,7 @@ elif [ "$MODE" = "profile" ]; then
         $PRECISION \
         -Xcompiler=-fopenmp \
         tests/profile_kernels.cu vendor/ini.c \
-        "$STATIC_LIB" "$RAYLIB_A" \
+        "$STATIC_LIB" "${LINK_ARCHIVES[@]}" \
         -lnccl -lnvidia-ml -lcublas -lcurand -lcudnn \
         -lGL -lm -lpthread $OMP_LIB \
         -o profile
diff --git a/config/impulse_wars.ini b/config/impulse_wars.ini
index 3e7c7f7bbb..772c08306f 100644
--- a/config/impulse_wars.ini
+++ b/config/impulse_wars.ini
@@ -26,6 +26,17 @@ sitting_duck = False
 continuous = False
 is_training = True
 
+reward_win = 2.0
+reward_self_kill = -1.0
+reward_enemy_death = 1.0
+reward_enemy_kill = 1.0
+reward_death = 0.0
+reward_energy_emptied = -0.75
+reward_weapon_pickup = 0.5
+reward_shield_break = 0.5
+reward_shot_hit_coef = 0.005
+reward_explosion_hit_coef = 0.005
+
 [train]
 total_timesteps = 1_000_000_000
 checkpoint_interval = 250
@@ -36,7 +47,6 @@ compile = False
 compile_mode = reduce-overhead
 compile_fullgraph = False
 
-
 [sweep]
 downsample = 10
 max_cost = 900
diff --git a/ocean/impulse_wars/benchmark.c b/ocean/impulse_wars/benchmark.c
index 3071bf91b3..c11c1e2050 100644
--- a/ocean/impulse_wars/benchmark.c
+++ b/ocean/impulse_wars/benchmark.c
@@ -1,16 +1,16 @@
 #include "env.h"
 
 void randActions(iwEnv *e) {
-    // e->lastRandState = e->randState;
+    // e->lastRandState = e->rng;
     uint8_t actionOffset = 0;
     for (uint8_t i = 0; i < e->numDrones; i++) {
-        e->actions[actionOffset + 0] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 1] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 2] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 3] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 4] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 5] = randFloat(&e->randState, -1.0f, 1.0f);
-        e->actions[actionOffset + 6] = randFloat(&e->randState, -1.0f, 1.0f);
+        e->actions[actionOffset + 0] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 1] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 2] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 3] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 4] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 5] = randFloat(&e->rng, -1.0f, 1.0f);
+        e->actions[actionOffset + 6] = randFloat(&e->rng, -1.0f, 1.0f);
 
         actionOffset += CONTINUOUS_ACTION_SIZE;
     }
diff --git a/ocean/impulse_wars/binding.c b/ocean/impulse_wars/binding.c
index 28b429773b..170421963b 100644
--- a/ocean/impulse_wars/binding.c
+++ b/ocean/impulse_wars/binding.c
@@ -1,121 +1,51 @@
-#include <Python.h>
-
 #include "env.h"
 
-static PyObject *get_consts(PyObject *self, PyObject *args);
+#define OBS_SIZE 998 // for 2 drones (players)
+// actions:
+// 9: move, noop + 8 directions
+// 17: aim, noop + 16 directions
+// 2: shoot or not
+// 2: brake or not
+// 2: burst or not
+#define NUM_ATNS 5
+#define ACT_SIZES {9, 17, 2, 2, 2}
+#define OBS_TENSOR_T FloatTensor
 
 #define Env iwEnv
-#define MY_SHARED
-#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"}
-
-#include "../env_binding.h"
-
-#define setDictVal(dict, key, val)                                            \
-    if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) {          \
-        PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \
-        return NULL;                                                          \
-    }
-
-static PyObject *get_consts(PyObject *self, PyObject *args) {
-    PyObject *dronesArg = PyTuple_GetItem(args, 0);
-    if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) {
-        PyErr_SetString(PyExc_TypeError, "num_drones must be an integer");
-        return NULL;
-    }
-    const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg);
-
-    PyObject *dict = PyDict_New();
-    if (PyErr_Occurred()) {
-        return NULL;
-    }
-
-    const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE);
-
-    setDictVal(dict, "obsBytes", obsBytes(numDrones));
-    setDictVal(dict, "mapObsSize", MAP_OBS_SIZE);
-    setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones));
-    setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones));
-    setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float));
-    setDictVal(dict, "wallTypes", NUM_WALL_TYPES);
-    setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1);
-    setDictVal(dict, "mapObsRows", MAP_OBS_ROWS);
-    setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS);
-    setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float)));
-    setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS);
-    setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET);
-    setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE);
-    setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE);
-    setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET);
-    setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS);
-    setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET);
-    setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE);
-    setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE);
-    setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET);
-    setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS);
-    setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE);
-    setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE);
-    setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET);
-    setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS);
-    setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET);
-    setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE);
-    setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE);
-    setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE);
-    setDictVal(dict, "droneObsOffset", droneObsOffset);
-    setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE);
-    setDictVal(dict, "miscObsSize", MISC_OBS_SIZE);
-    setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE);
+#include "vecenv.h"
 
-    setDictVal(dict, "maxDrones", MAX_DRONES);
-    setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE);
+#define DICTGET(key) dict_get(kwargs, key)->value
 
-    return dict;
-}
-
-static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
-    VecEnv *ve = unpack_vecenv(args);
-    initMaps(ve->envs[0]);
-
-    for (uint16_t i = 0; i < ve->num_envs; i++) {
-        iwEnv *e = (iwEnv *)ve->envs[i];
-        setupEnv(e);
-    }
-
-    return Py_None;
-}
-
-static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) {
+void my_init(Env* env, Dict* kwargs) {
     initEnv(
-        e,
-        (uint8_t)unpack(kwargs, "num_drones"),
-        (uint8_t)unpack(kwargs, "num_agents"),
-        (int8_t)unpack(kwargs, "map_idx"),
-        (uint64_t)unpack(kwargs, "seed"),
-        (bool)unpack(kwargs, "enable_teams"),
-        (bool)unpack(kwargs, "sitting_duck"),
-        (bool)unpack(kwargs, "is_training"),
-        (bool)unpack(kwargs, "continuous")
+        env,
+        2,
+        1,
+        -1,
+        0,
+        (bool)DICTGET("enable_teams"),
+        (bool)DICTGET("sitting_duck"),
+        (bool)DICTGET("is_training"),
+        (bool)DICTGET("continuous")
     );
+
     setRewards(
-        e,
-        (float)unpack(kwargs, "reward_win"),
-        (float)unpack(kwargs, "reward_self_kill"),
-        (float)unpack(kwargs, "reward_enemy_death"),
-        (float)unpack(kwargs, "reward_enemy_kill"),
+        env,
+        (float)DICTGET("reward_win"),
+        (float)DICTGET("reward_self_kill"),
+        (float)DICTGET("reward_enemy_death"),
+        (float)DICTGET("reward_enemy_kill"),
         0.0f, // teammate death punishment
         0.0f, // teammate kill punishment
-        (float)unpack(kwargs, "reward_death"),
-        (float)unpack(kwargs, "reward_energy_emptied"),
-        (float)unpack(kwargs, "reward_weapon_pickup"),
-        (float)unpack(kwargs, "reward_shield_break"),
-        (float)unpack(kwargs, "reward_shot_hit_coef"),
-        (float)unpack(kwargs, "reward_explosion_hit_coef")
+        (float)DICTGET("reward_death"),
+        (float)DICTGET("reward_energy_emptied"),
+        (float)DICTGET("reward_weapon_pickup"),
+        (float)DICTGET("reward_shield_break"),
+        (float)DICTGET("reward_shot_hit_coef"),
+        (float)DICTGET("reward_explosion_hit_coef")
     );
-    return 0;
+
+    initMaps(env);
 }
 
 #define _LOG_BUF_SIZE 128
@@ -130,48 +60,46 @@ char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, cons
     return buf;
 }
 
-static int my_log(PyObject *dict, Log *log) {
-    assign_to_dict(dict, "episode_length", log->length);
-    assign_to_dict(dict, "ties", log->ties);
+void my_log(Log *log, Dict *out) {
+    dict_set(out, "episode_length", log->length);
+    dict_set(out, "ties", log->ties);
 
-    assign_to_dict(dict, "perf", log->stats[0].wins);
-    assign_to_dict(dict, "score", log->stats[0].wins);
+    dict_set(out, "perf", log->stats[0].wins);
+    dict_set(out, "score", log->stats[0].wins);
 
     char buf[_LOG_BUF_SIZE] = {0};
     for (uint8_t i = 0; i < MAX_DRONES; i++) {
-        assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns);
-        assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled);
-        assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled);
-        assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime);
-        assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts);
-        assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit);
-        assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied);
-        assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken);
-        assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken);
-        assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills);
-        assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills);
-        assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills);
-        assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins);
+        dict_set(out, droneLog(buf, i, "returns"), log->stats[i].returns);
+        dict_set(out, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled);
+        dict_set(out, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled);
+        dict_set(out, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime);
+        dict_set(out, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts);
+        dict_set(out, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit);
+        dict_set(out, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied);
+        dict_set(out, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken);
+        dict_set(out, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken);
+        dict_set(out, droneLog(buf, i, "self_kills"), log->stats[i].selfKills);
+        dict_set(out, droneLog(buf, i, "kills"), log->stats[i].kills);
+        dict_set(out, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills);
+        dict_set(out, droneLog(buf, i, "wins"), log->stats[i].wins);
 
         // useful for debugging weapon balance, but really slows down
         // sweeps due to adding a ton of extra logging data
         //
         // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) {
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]);
+        //     dict_set(out, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]);
         // }
 
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired);
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit);
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken);
-        assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken);
-        assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp);
-        assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances);
+        dict_set(out, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired);
+        dict_set(out, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit);
+        dict_set(out, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken);
+        dict_set(out, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken);
+        dict_set(out, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp);
+        dict_set(out, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances);
     }
-
-    return 0;
 }
diff --git a/ocean/impulse_wars/binding.h b/ocean/impulse_wars/binding.h
deleted file mode 100644
index 28b429773b..0000000000
--- a/ocean/impulse_wars/binding.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <Python.h>
-
-#include "env.h"
-
-static PyObject *get_consts(PyObject *self, PyObject *args);
-
-#define Env iwEnv
-#define MY_SHARED
-#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"}
-
-#include "../env_binding.h"
-
-#define setDictVal(dict, key, val)                                            \
-    if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) {          \
-        PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \
-        return NULL;                                                          \
-    }
-
-static PyObject *get_consts(PyObject *self, PyObject *args) {
-    PyObject *dronesArg = PyTuple_GetItem(args, 0);
-    if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) {
-        PyErr_SetString(PyExc_TypeError, "num_drones must be an integer");
-        return NULL;
-    }
-    const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg);
-
-    PyObject *dict = PyDict_New();
-    if (PyErr_Occurred()) {
-        return NULL;
-    }
-
-    const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE);
-
-    setDictVal(dict, "obsBytes", obsBytes(numDrones));
-    setDictVal(dict, "mapObsSize", MAP_OBS_SIZE);
-    setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones));
-    setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones));
-    setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float));
-    setDictVal(dict, "wallTypes", NUM_WALL_TYPES);
-    setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1);
-    setDictVal(dict, "mapObsRows", MAP_OBS_ROWS);
-    setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS);
-    setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float)));
-    setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS);
-    setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET);
-    setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE);
-    setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE);
-    setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET);
-    setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS);
-    setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET);
-    setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE);
-    setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE);
-    setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET);
-    setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS);
-    setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE);
-    setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE);
-    setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET);
-    setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS);
-    setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET);
-    setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE);
-    setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE);
-    setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET);
-    setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE);
-    setDictVal(dict, "droneObsOffset", droneObsOffset);
-    setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE);
-    setDictVal(dict, "miscObsSize", MISC_OBS_SIZE);
-    setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE);
-
-    setDictVal(dict, "maxDrones", MAX_DRONES);
-    setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE);
-
-    return dict;
-}
-
-static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
-    VecEnv *ve = unpack_vecenv(args);
-    initMaps(ve->envs[0]);
-
-    for (uint16_t i = 0; i < ve->num_envs; i++) {
-        iwEnv *e = (iwEnv *)ve->envs[i];
-        setupEnv(e);
-    }
-
-    return Py_None;
-}
-
-static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) {
-    initEnv(
-        e,
-        (uint8_t)unpack(kwargs, "num_drones"),
-        (uint8_t)unpack(kwargs, "num_agents"),
-        (int8_t)unpack(kwargs, "map_idx"),
-        (uint64_t)unpack(kwargs, "seed"),
-        (bool)unpack(kwargs, "enable_teams"),
-        (bool)unpack(kwargs, "sitting_duck"),
-        (bool)unpack(kwargs, "is_training"),
-        (bool)unpack(kwargs, "continuous")
-    );
-    setRewards(
-        e,
-        (float)unpack(kwargs, "reward_win"),
-        (float)unpack(kwargs, "reward_self_kill"),
-        (float)unpack(kwargs, "reward_enemy_death"),
-        (float)unpack(kwargs, "reward_enemy_kill"),
-        0.0f, // teammate death punishment
-        0.0f, // teammate kill punishment
-        (float)unpack(kwargs, "reward_death"),
-        (float)unpack(kwargs, "reward_energy_emptied"),
-        (float)unpack(kwargs, "reward_weapon_pickup"),
-        (float)unpack(kwargs, "reward_shield_break"),
-        (float)unpack(kwargs, "reward_shot_hit_coef"),
-        (float)unpack(kwargs, "reward_explosion_hit_coef")
-    );
-    return 0;
-}
-
-#define _LOG_BUF_SIZE 128
-
-char *droneLog(char *buf, const uint8_t droneIdx, const char *name) {
-    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name);
-    return buf;
-}
-
-char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) {
-    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name);
-    return buf;
-}
-
-static int my_log(PyObject *dict, Log *log) {
-    assign_to_dict(dict, "episode_length", log->length);
-    assign_to_dict(dict, "ties", log->ties);
-
-    assign_to_dict(dict, "perf", log->stats[0].wins);
-    assign_to_dict(dict, "score", log->stats[0].wins);
-
-    char buf[_LOG_BUF_SIZE] = {0};
-    for (uint8_t i = 0; i < MAX_DRONES; i++) {
-        assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns);
-        assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled);
-        assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled);
-        assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime);
-        assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts);
-        assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit);
-        assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied);
-        assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken);
-        assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken);
-        assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills);
-        assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills);
-        assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills);
-        assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins);
-
-        // useful for debugging weapon balance, but really slows down
-        // sweeps due to adding a ton of extra logging data
-        //
-        // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) {
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]);
-        //     assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]);
-        // }
-
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired);
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit);
-        assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken);
-        assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken);
-        assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp);
-        assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances);
-    }
-
-    return 0;
-}
diff --git a/ocean/impulse_wars/env.h b/ocean/impulse_wars/env.h
index 2162b50144..3fc90be472 100644
--- a/ocean/impulse_wars/env.h
+++ b/ocean/impulse_wars/env.h
@@ -292,7 +292,7 @@ void computeNearObs(iwEnv *e, const droneEntity *drone, const uint16_t discreteO
 }
 
 void computeObs(iwEnv *e) {
-    for (uint8_t agentIdx = 0; agentIdx < e->numAgents; agentIdx++) {
+    for (uint8_t agentIdx = 0; agentIdx < e->num_agents; agentIdx++) {
         droneEntity *agentDrone = safe_array_get_at(e->drones, agentIdx);
         // if the drone is dead, only compute observations if it died
         // this step and it isn't out of bounds
@@ -478,7 +478,7 @@ void setupEnv(iwEnv *e) {
         if (!e->isTraining) {
             firstMap = 1;
         }
-        mapIdx = randInt(&e->randState, firstMap, NUM_MAPS - 1);
+        mapIdx = randInt(&e->rng, firstMap, NUM_MAPS - 1);
     }
     DEBUG_LOGF("setting up map %d", mapIdx);
     setupMap(e, mapIdx);
@@ -493,7 +493,7 @@ void setupEnv(iwEnv *e) {
 
     DEBUG_LOG("creating weapon pickups");
     // start spawning pickups in a random quadrant
-    e->lastSpawnQuad = randInt(&e->randState, 0, 3);
+    e->lastSpawnQuad = randInt(&e->rng, 0, 3);
     for (uint8_t i = 0; i < maps[mapIdx]->weaponPickups; i++) {
         createWeaponPickup(e);
     }
@@ -530,7 +530,7 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui
     DEBUG_LOGF("seed: %lu", seed);
 
     e->numDrones = numDrones;
-    e->numAgents = numAgents;
+    e->num_agents = numAgents;
     e->teamsEnabled = enableTeams;
     e->numTeams = numDrones;
     if (e->teamsEnabled) {
@@ -557,11 +557,10 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui
 
     e->continuousActions = continuousActions;
 
-    // TODO: remove when puffer bindings add truncations
-    e->truncations = fastCalloc(numDrones, sizeof(uint8_t));
+    //e->truncations = fastCalloc(numDrones, sizeof(uint8_t));
 
     setEnvFrameRate(e);
-    e->randState = seed;
+    e->rng = seed;
     e->needsReset = false;
 
     b2WorldDef worldDef = b2DefaultWorldDef();
@@ -620,9 +619,9 @@ void setRewards(iwEnv *e, float winReward, float selfKillPunishment, float enemy
 
 void clearEnv(iwEnv *e) {
     // rewards get cleared in stepEnv every step
-    // memset(e->masks, 1, e->numAgents * sizeof(uint8_t));
-    memset(e->terminals, 0x0, e->numAgents * sizeof(uint8_t));
-    memset(e->truncations, 0x0, e->numAgents * sizeof(uint8_t));
+    // memset(e->masks, 1, e->num_agents * sizeof(uint8_t));
+    memset(e->terminals, 0.0f, e->num_agents * sizeof(uint8_t));
+    //memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t));
 
     e->episodeLength = 0;
     memset(e->stats, 0x0, sizeof(e->stats));
@@ -749,7 +748,7 @@ float computeReward(iwEnv *e, droneEntity *drone) {
             reward += e->shieldBreakReward;
         }
 
-        if (e->numAgents == e->numDrones) {
+        if (e->num_agents == e->numDrones) {
             if (drone->stepInfo.shotTaken[i] != 0) {
                 reward -= drone->stepInfo.shotTaken[i] * e->shotHitRewardCoef;
             }
@@ -791,7 +790,7 @@ float computeReward(iwEnv *e, droneEntity *drone) {
 const float REWARD_EPS = 1.0e-6f;
 
 void computeRewards(iwEnv *e, const bool roundOver, const int8_t winner, const int8_t winningTeam) {
-    if (roundOver && winner != -1 && winner < e->numAgents) {
+    if (roundOver && winner != -1 && winner < e->num_agents) {
         e->rewards[winner] += e->winReward;
     }
 
@@ -807,7 +806,7 @@ void computeRewards(iwEnv *e, const bool roundOver, const int8_t winner, const i
                 reward += e->selfKillPunishment;
             }
         }
-        if (i < e->numAgents) {
+        if (i < e->num_agents) {
             e->rewards[i] += reward;
         }
         e->stats[i].returns += reward;
@@ -821,23 +820,39 @@ static inline bool isActionNoop(const b2Vec2 action) {
 agentActions _computeActions(iwEnv *e, droneEntity *drone, const agentActions *manualActions) {
     agentActions actions = {0};
 
-    const uint8_t offset = drone->idx * CONTINUOUS_ACTION_SIZE;
     if (manualActions == NULL) {
-        actions.move = (b2Vec2){.x = e->actions[offset + 0], .y = e->actions[offset + 1]};
-        actions.aim = (b2Vec2){.x = e->actions[offset + 2], .y = e->actions[offset + 3]};
+        float (*envActions)[7] = (float(*)[7])e->actions;
+
+        uint8_t move = envActions[drone->idx][0];
+        // 0 is no-op for both move and aim
+        ASSERT(move <= 8);
+        if (move != 0) {
+            move--;
+            actions.move.x = discMoveToContMoveMap[0][move];
+            actions.move.y = discMoveToContMoveMap[1][move];
+        }
+        uint8_t aim = envActions[drone->idx][0];
+        ASSERT(aim <= 16);
+        if (aim != 0) {
+            aim--;
+            actions.aim.x = discAimToContAimMap[0][aim];
+            actions.aim.y = discAimToContAimMap[1][aim];
+        }
+
         if (e->continuousActions) {
             actions.move.x = tanhf(actions.move.x);
             actions.move.y = tanhf(actions.move.y);
             actions.aim.x = tanhf(actions.aim.x);
             actions.aim.y = tanhf(actions.aim.y);
         }
-        actions.chargingWeapon = e->actions[offset + 4] > 0.0f;
+
+        actions.chargingWeapon = envActions[drone->idx][4] > 0.0f;
         actions.shoot = actions.chargingWeapon;
         if (!actions.chargingWeapon && drone->chargingWeapon) {
             actions.shoot = true;
         }
-        actions.brake = e->actions[offset + 5] > 0.0f;
-        actions.chargingBurst = e->actions[offset + 6] > 0.0f;
+        actions.brake = envActions[drone->idx][5] > 0.0f;
+        actions.chargingBurst = envActions[drone->idx][6] > 0.0f;
     } else {
         actions.move = manualActions->move;
         actions.aim = manualActions->aim;
@@ -1065,7 +1080,7 @@ void stepEnv(iwEnv *e) {
             continue;
         }
 
-        if (i < e->numAgents) {
+        if (i < e->num_agents) {
             stepActions[i] = computeActions(e, drone, NULL);
         } else {
             const agentActions scriptedActions = scriptedAgentActions(e, drone);
@@ -1074,7 +1089,7 @@ void stepEnv(iwEnv *e) {
     }
 
     // reset reward buffer
-    memset(e->rewards, 0x0, e->numAgents * sizeof(float));
+    memset(e->rewards, 0x0, e->num_agents * sizeof(float));
 
     for (int i = 0; i < e->frameSkip; i++) {
 #ifdef __EMSCRIPTEN__
@@ -1154,7 +1169,7 @@ void stepEnv(iwEnv *e) {
 
             // handle sudden death
             e->stepsLeft = max(e->stepsLeft - 1, 0);
-            if ((!e->isTraining || e->numDrones == e->numAgents) && e->stepsLeft == 0) {
+            if ((!e->isTraining || e->numDrones == e->num_agents) && e->stepsLeft == 0) {
                 e->suddenDeathSteps = max(e->suddenDeathSteps - 1, 0);
                 if (e->suddenDeathSteps == 0) {
                     DEBUG_LOG("placing sudden death walls");
@@ -1190,9 +1205,9 @@ void stepEnv(iwEnv *e) {
                     }
                 } else {
                     deadDrones++;
-                    if (i < e->numAgents) {
+                    if (i < e->num_agents) {
                         if (drone->diedThisStep) {
-                            e->terminals[i] = 1;
+                            e->terminals[i] = 1.0f;
                         }
                         // else {
                         //     e->masks[i] = 0;
@@ -1212,7 +1227,7 @@ void stepEnv(iwEnv *e) {
             }
             // if the enemy drone(s) are scripted don't enable sudden death
             // so that the agent has to work for victories
-            if (e->isTraining && e->numDrones != e->numAgents && e->stepsLeft == 0) {
+            if (e->isTraining && e->numDrones != e->num_agents && e->stepsLeft == 0) {
                 roundOver = true;
                 lastAliveTeam = -1;
             }
@@ -1226,13 +1241,13 @@ void stepEnv(iwEnv *e) {
             }
 
             if (roundOver) {
-                if (e->numDrones != e->numAgents && e->stepsLeft == 0) {
-                    DEBUG_LOG("truncating episode");
-                    memset(e->truncations, 1, e->numAgents * sizeof(uint8_t));
-                } else {
-                    DEBUG_LOG("terminating episode");
-                    memset(e->terminals, 1, e->numAgents * sizeof(uint8_t));
-                }
+                // if (e->numDrones != e->num_agents && e->stepsLeft == 0) {
+                //     DEBUG_LOG("truncating episode");
+                //     memset(e->truncations, 1, e->num_agents * sizeof(uint8_t));
+                // }
+
+                DEBUG_LOG("terminating episode");
+                memset(e->terminals, 1.0f, e->num_agents * sizeof(float));            
 
                 Log log = {0};
                 log.length = e->episodeLength;
diff --git a/ocean/impulse_wars/game.h b/ocean/impulse_wars/game.h
index 65e76f12e5..c47539f906 100644
--- a/ocean/impulse_wars/game.h
+++ b/ocean/impulse_wars/game.h
@@ -336,14 +336,14 @@ bool findOpenPos(iwEnv *e, const enum shapeCategory shapeType, b2Vec2 *emptyPos,
 
         uint16_t cellIdx;
         if (quad == -1) {
-            cellIdx = randInt(&e->randState, 0, nCells);
+            cellIdx = randInt(&e->rng, 0, nCells);
         } else {
             const float minX = e->map->spawnQuads[quad].min.x;
             const float minY = e->map->spawnQuads[quad].min.y;
             const float maxX = e->map->spawnQuads[quad].max.x;
             const float maxY = e->map->spawnQuads[quad].max.y;
 
-            b2Vec2 randPos = {.x = randFloat(&e->randState, minX, maxX), .y = randFloat(&e->randState, minY, maxY)};
+            b2Vec2 randPos = {.x = randFloat(&e->rng, minX, maxX), .y = randFloat(&e->rng, minY, maxY)};
             cellIdx = entityPosToCellIdx(e, randPos);
         }
         if (bitTest(checkedCells, cellIdx)) {
@@ -551,7 +551,7 @@ enum weaponType randWeaponPickupType(iwEnv *e) {
         totalWeight += spawnWeights[i - 1];
     }
 
-    const float randPick = randFloat(&e->randState, 0.0f, totalWeight);
+    const float randPick = randFloat(&e->rng, 0.0f, totalWeight);
     float cumulativeWeight = 0.0f;
     enum weaponType type = STANDARD_WEAPON;
     for (uint8_t i = 1; i < NUM_WEAPONS; i++) {
@@ -718,7 +718,7 @@ void createDrone(iwEnv *e, const uint8_t idx) {
         // doing this while training will result in much slower learning
         // due to drones starting much farther apart
         if (e->lastSpawnQuad == -1) {
-            spawnQuad = randInt(&e->randState, 0, 3);
+            spawnQuad = randInt(&e->rng, 0, 3);
         } else if (e->numDrones == 2) {
             spawnQuad = 3 - e->lastSpawnQuad;
         } else {
@@ -787,10 +787,10 @@ void droneAddEnergy(droneEntity *drone, float energy) {
 }
 
 void createDronePiece(iwEnv *e, droneEntity *drone, const bool fromShield) {
-    const float distance = randFloat(&e->randState, DRONE_PIECE_MIN_DISTANCE, DRONE_PIECE_MAX_DISTANCE);
-    const b2Vec2 direction = {.x = randFloat(&e->randState, -1.0f, 1.0f), .y = randFloat(&e->randState, -1.0f, 1.0f)};
+    const float distance = randFloat(&e->rng, DRONE_PIECE_MIN_DISTANCE, DRONE_PIECE_MAX_DISTANCE);
+    const b2Vec2 direction = {.x = randFloat(&e->rng, -1.0f, 1.0f), .y = randFloat(&e->rng, -1.0f, 1.0f)};
     const b2Vec2 pos = b2MulAdd(drone->pos, distance, direction);
-    const b2Rot rot = b2MakeRot(randFloat(&e->randState, -PI, PI));
+    const b2Rot rot = b2MakeRot(randFloat(&e->rng, -PI, PI));
 
     dronePieceEntity *piece = fastCalloc(1, sizeof(dronePieceEntity));
     piece->droneIdx = drone->idx;
@@ -810,9 +810,9 @@ void createDronePiece(iwEnv *e, droneEntity *drone, const bool fromShield) {
     pieceBodyDef.linearDamping = DRONE_PIECE_LINEAR_DAMPING;
     pieceBodyDef.angularDamping = DRONE_PIECE_ANGULAR_DAMPING;
     const float bonus = 1.0f + min(b2Length(drone->velocity) / 15.0f, 5.0f);
-    const float speed = randFloat(&e->randState, DRONE_PIECE_MIN_SPEED, DRONE_PIECE_MAX_SPEED) * bonus;
+    const float speed = randFloat(&e->rng, DRONE_PIECE_MIN_SPEED, DRONE_PIECE_MAX_SPEED) * bonus;
     pieceBodyDef.linearVelocity = b2MulSV(speed, direction);
-    pieceBodyDef.angularVelocity = randFloat(&e->randState, -PI, PI);
+    pieceBodyDef.angularVelocity = randFloat(&e->rng, -PI, PI);
     pieceBodyDef.userData = ent;
     piece->bodyID = b2CreateBody(e->worldID, &pieceBodyDef);
 
@@ -1145,8 +1145,8 @@ void createProjectile(iwEnv *e, droneEntity *drone, const b2Vec2 normAim) {
     b2Vec2 forwardVel = b2MulSV(b2Dot(drone->velocity, normAim), normAim);
     b2Vec2 lateralVel = b2Sub(drone->velocity, forwardVel);
     lateralVel = b2MulSV(projectileShapeDef.density * DRONE_MOVE_AIM_COEF, lateralVel);
-    b2Vec2 aim = weaponAdjustAim(&e->randState, drone->weaponInfo->type, drone->heat, normAim);
-    b2Vec2 fire = b2MulAdd(lateralVel, weaponFire(&e->randState, drone->weaponInfo->type), aim);
+    b2Vec2 aim = weaponAdjustAim(&e->rng, drone->weaponInfo->type, drone->heat, normAim);
+    b2Vec2 fire = b2MulAdd(lateralVel, weaponFire(&e->rng, drone->weaponInfo->type), aim);
     b2Body_ApplyLinearImpulseToCenter(projectileBodyID, fire, true);
 
     projectileEntity *projectile = fastCalloc(1, sizeof(projectileEntity));
@@ -1387,8 +1387,8 @@ bool explodeCallback(b2ShapeId shapeID, void *context) {
     // if the direction is zero, the magnitude cannot be calculated
     // correctly so set the direction randomly
     if (b2VecEqual(direction, b2Vec2_zero)) {
-        direction.x = randFloat(&ctx->e->randState, -1.0f, 1.0f);
-        direction.y = randFloat(&ctx->e->randState, -1.0f, 1.0f);
+        direction.x = randFloat(&ctx->e->rng, -1.0f, 1.0f);
+        direction.y = randFloat(&ctx->e->rng, -1.0f, 1.0f);
         direction = b2Normalize(direction);
     }
 
diff --git a/ocean/impulse_wars/impulse_wars.c b/ocean/impulse_wars/impulse_wars.c
index 1dfbd1d5c3..26db5ae5a6 100644
--- a/ocean/impulse_wars/impulse_wars.c
+++ b/ocean/impulse_wars/impulse_wars.c
@@ -16,9 +16,9 @@ int main(void) {
     posix_memalign((void **)&e->observations, sizeof(void *), alignedSize(NUM_DRONES * obsBytes(NUM_DRONES), sizeof(float)));
     e->rewards = fastCalloc(NUM_DRONES, sizeof(float));
     e->actions = fastCalloc(NUM_DRONES * CONTINUOUS_ACTION_SIZE, sizeof(float));
-    e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t));
-    e->terminals = fastCalloc(NUM_DRONES, sizeof(uint8_t));
-    e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t));
+    //e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t));
+    e->terminals = fastCalloc(NUM_DRONES, sizeof(float));
+    //e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t));
 
     rayClient *client = createRayClient();
     e->client = client;
@@ -41,9 +41,9 @@ int main(void) {
     free(e->observations);
     fastFree(e->actions);
     fastFree(e->rewards);
-    fastFree(e->masks);
+    //fastFree(e->masks);
     fastFree(e->terminals);
-    fastFree(e->truncations);
+    //fastFree(e->truncations);
     fastFree(e);
     destroyRayClient(client);
 #endif
diff --git a/ocean/impulse_wars/map.h b/ocean/impulse_wars/map.h
index eb5f2c7026..3e3c3f7c50 100644
--- a/ocean/impulse_wars/map.h
+++ b/ocean/impulse_wars/map.h
@@ -450,8 +450,8 @@ void setupMap(iwEnv *e, const uint8_t mapIdx) {
     e->mapIdx = mapIdx;
     e->map = maps[mapIdx];
     e->defaultWeapon = weaponInfos[maps[mapIdx]->defaultWeapon];
-    if (e->isTraining && randFloat(&e->randState, 0.0f, 1.0f) < 0.25f) {
-        e->defaultWeapon = weaponInfos[randInt(&e->randState, 0, NUM_WEAPONS - 1)];
+    if (e->isTraining && randFloat(&e->rng, 0.0f, 1.0f) < 0.25f) {
+        e->defaultWeapon = weaponInfos[randInt(&e->rng, 0, NUM_WEAPONS - 1)];
     }
 
     uint16_t cellIdx = 0;
@@ -576,7 +576,13 @@ bool posValidDroneSpawnPoint(const iwEnv *e, const b2Vec2 pos) {
     return true;
 }
 
+bool MAPS_INITIALIZED = false;
+
 void initMaps(iwEnv *e) {
+    if (MAPS_INITIALIZED) {
+        return;
+    }
+
     for (uint8_t i = 0; i < NUM_MAPS; i++) {
         setupMap(e, i);
         mapEntry *map = maps[i];
@@ -631,6 +637,8 @@ void initMaps(iwEnv *e) {
     }
 
     e->mapIdx = -1;
+
+    MAPS_INITIALIZED = true;
 }
 
 void destroyMaps() {
diff --git a/ocean/impulse_wars/render.h b/ocean/impulse_wars/render.h
index d4300d5538..0b46c74d98 100644
--- a/ocean/impulse_wars/render.h
+++ b/ocean/impulse_wars/render.h
@@ -809,7 +809,7 @@ void renderUI(const iwEnv *e, const bool starting) {
         char *playerType = "";
         if (droneControlledByHuman(e, drone->idx)) {
             playerType = "Human";
-        } else if (drone->idx < e->numAgents) {
+        } else if (drone->idx < e->num_agents) {
             playerType = "NN";
         } else {
             if (e->sittingDuck) {
@@ -1211,7 +1211,7 @@ void renderDroneGuides(iwEnv *e, const droneEntity *drone, const bool ending) {
     if (!b2VecEqual(drone->lastMove, b2Vec2_zero) && !ending) {
         const float moveMagnitude = b2Length(drone->lastMove);
         const float thrusterAngle = RAD2DEG * b2Atan2(-drone->lastMove.y, -drone->lastMove.x);
-        const float flickerWidth = randFloat(&e->randState, -0.05f, 0.05f);
+        const float flickerWidth = randFloat(&e->rng, -0.05f, 0.05f);
         const float thrusterWidth = 2.5f * ((halfDroneRadius * moveMagnitude) + halfDroneRadius + flickerWidth);
         const b2Vec2 thrusterPos = b2MulAdd(drone->pos, -thrusterWidth / 2.0f, drone->lastMove);
         const Color thrusterColor = Fade(getDroneColor(drone->idx), 0.9);
diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h
index 9bad059336..113060ba2e 100644
--- a/ocean/impulse_wars/types.h
+++ b/ocean/impulse_wars/types.h
@@ -413,7 +413,7 @@ typedef struct debugPoint {
 
 typedef struct iwEnv {
     uint8_t numDrones;
-    uint8_t numAgents;
+    uint8_t num_agents;
     uint8_t numTeams;
     bool teamsEnabled;
     bool sittingDuck;
@@ -439,15 +439,15 @@ typedef struct iwEnv {
     uint8_t *observations;
     float *rewards;
     float *actions;
-    uint8_t *masks;
-    uint8_t *terminals;
-    uint8_t *truncations;
+    //uint8_t *masks;
+    float *terminals;
+    //uint8_t *truncations;
 
     uint8_t frameRate;
     float deltaTime;
     uint8_t frameSkip;
     uint8_t box2dSubSteps;
-    uint64_t randState;
+    uint64_t rng;
     bool needsReset;
 
     uint16_t episodeLength;

From 498681421c8c64023504b0ec39f96618b1c75c80 Mon Sep 17 00:00:00 2001
From: Andrew LeFevre <jalefevre@liberty.edu>
Date: Tue, 28 Apr 2026 14:42:56 -0400
Subject: [PATCH 3/4] trains without crashing

Co-authored-by: Copilot <copilot@github.com>
---
 build.sh                                      |  27 ++-
 config/impulse_wars.ini                       | 130 +------------
 .../impulse_wars/.clang-format                |   1 -
 ocean/impulse_wars/binding.c                  |  72 +++----
 ocean/impulse_wars/env.h                      |  57 +++---
 ocean/impulse_wars/helpers.h                  |  14 --
 ocean/impulse_wars/impulse_wars.c             |   8 +-
 ocean/impulse_wars/impulse_wars.py            | 181 ------------------
 ocean/impulse_wars/pyproject.toml             |  25 ---
 ocean/impulse_wars/types.h                    |   7 +-
 pufferlib/pufferl.py                          |   2 +-
 .../impulse_wars/shaders/gls330/bloom.fs      |   2 +-
 resources/impulse_wars/shaders/gls330/blur.fs |   2 +-
 src/bindings.cu                               |   4 +-
 src/bindings_cpu.cpp                          |   2 +-
 src/pufferlib.cu                              |   2 +-
 16 files changed, 89 insertions(+), 447 deletions(-)
 rename .clang-format => ocean/impulse_wars/.clang-format (99%)
 delete mode 100644 ocean/impulse_wars/impulse_wars.py
 delete mode 100644 ocean/impulse_wars/pyproject.toml

diff --git a/build.sh b/build.sh
index 6cb06a7403..46beafaceb 100755
--- a/build.sh
+++ b/build.sh
@@ -50,20 +50,23 @@ if [ "$ENV" = "all" ]; then
     exit 0
 fi
 
+STANDALONE_LDFLAGS=(-fuse-ld=lld)
+SHARED_LDFLAGS=(-fuse-ld=lld)
+
 # Linux/mac
 PLATFORM="$(uname -s)"
 if [ "$PLATFORM" = "Linux" ]; then
     RAYLIB_NAME='raylib-5.5_linux_amd64'
     OMP_LIB=-lomp5
     SANITIZE_FLAGS=(-fsanitize=address,undefined,bounds,pointer-overflow,leak -fno-omit-frame-pointer)
-    STANDALONE_LDFLAGS=(-lGL)
-    SHARED_LDFLAGS=(-Bsymbolic-functions)
+    STANDALONE_LDFLAGS+=(-lGL)
+    SHARED_LDFLAGS+=(-Bsymbolic-functions)
 else
     RAYLIB_NAME='raylib-5.5_macos'
     OMP_LIB=-lomp
     SANITIZE_FLAGS=()
-    STANDALONE_LDFLAGS=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL)
-    SHARED_LDFLAGS=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup)
+    STANDALONE_LDFLAGS+=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL)
+    SHARED_LDFLAGS+=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup)
 fi
 
 CLANG_WARN=(
@@ -120,9 +123,13 @@ elif [ "$ENV" = "impulse_wars" ]; then
     BOX2D_URL="https://github.com/capnspacehook/box2d/releases/latest/download"
     download "$BOX2D_NAME" "$BOX2D_URL/$BOX2D_NAME.tar.gz"
     INCLUDES+=(-I./$BOX2D_NAME/include -I./$BOX2D_NAME/src)
-    LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a")
-
-    CLANG_OPT=(-flto -fno-math-errno -march=native)
+    
+    if [ -z "$DEBUG" ]; then
+        CLANG_OPT+=(-flto -fno-math-errno -march=native)
+        LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a")
+    else
+        LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2dd.a")
+    fi
 elif [ -d "ocean/$ENV" ]; then
     SRC_DIR="ocean/$ENV"
 else
@@ -137,9 +144,9 @@ if [ -n "$DEBUG" ] || [ "$MODE" = "local" ]; then
     NVCC_OPT="-O0 -g"
     LINK_OPT="-g"
 else
-    CLANG_OPT+=(-O2 -DNDEBUG "${CLANG_WARN[@]}")
-    NVCC_OPT="-O2 --threads 0"
-    LINK_OPT="-O2"
+    CLANG_OPT+=(-O3 -DNDEBUG "${CLANG_WARN[@]}")
+    NVCC_OPT="-O3 --threads 0"
+    LINK_OPT="-O3"
 fi
 if [ "$MODE" = "local" ] || [ "$MODE" = "fast" ]; then
     FLAGS=(
diff --git a/config/impulse_wars.ini b/config/impulse_wars.ini
index 772c08306f..a252984ddc 100644
--- a/config/impulse_wars.ini
+++ b/config/impulse_wars.ini
@@ -1,28 +1,11 @@
 [base]
 env_name = impulse_wars
 
-max_suggestion_cost = 10_800
-
-[policy]
-hidden_size = 512
-cnn_channels = 64
-
-# These must match what's set in env below
-continuous = False
-num_drones = 2
-is_training = True
-
-[vec]
-num_envs = 4
-#num_workers = 4
-#batch_size = 4
-
 [env]
-num_envs = 1024
 num_drones = 2
 num_agents = 1
 enable_teams = False
-sitting_duck = False
+sitting_duck = True
 continuous = False
 is_training = True
 
@@ -40,114 +23,3 @@ reward_explosion_hit_coef = 0.005
 [train]
 total_timesteps = 1_000_000_000
 checkpoint_interval = 250
-
-learning_rate = 0.005
-
-compile = False
-compile_mode = reduce-overhead
-compile_fullgraph = False
-
-[sweep]
-downsample = 10
-max_cost = 900
-
-[sweep.env.num_envs]
-distribution = uniform_pow2
-min = 1
-max = 1024
-mean = 128
-scale = auto
-
-# reward parameters
-[sweep.env.reward_win]
-distribution = uniform
-min = 0.0
-mean = 2.0
-max = 5.0
-scale = auto
-
-[sweep.env.reward_self_kill]
-distribution = uniform
-min = -3.0
-mean = -1.0
-max = 0.0
-scale = auto
-
-[sweep.env.reward_enemy_death]
-distribution = uniform
-min = 0.0
-mean = 1.0
-max = 3.0
-scale = auto
-
-[sweep.env.reward_kill]
-distribution = uniform
-min = 0.0
-mean = 1.0
-max = 3.0
-scale = auto
-
-[sweep.env.reward_death]
-distribution = uniform
-min = -1.0
-mean = -0.25
-max = 0.0
-scale = auto
-
-[sweep.env.reward_energy_emptied]
-distribution = uniform
-min = -2.0
-mean = -0.75
-max = 0.0
-scale = auto
-
-[sweep.env.reward_weapon_pickup]
-distribution = uniform
-min = 0.0
-mean = 0.5
-max = 3.0
-scale = auto
-
-[sweep.env.reward_shield_break]
-distribution = uniform
-min = 0.0
-mean = 0.5
-max = 3.0
-scale = auto
-
-[sweep.env.reward_shot_hit_coef]
-distribution = log_normal
-min = 0.0005
-mean = 0.005
-max = 0.05
-scale = auto
-
-[sweep.env.reward_explosion_hit_coef]
-distribution = log_normal
-min = 0.0005
-mean = 0.005
-max = 0.05
-scale = auto
-
-# hyperparameters
-[sweep.train.total_timesteps]
-distribution = log_normal
-min = 250_000_000
-max = 1_500_000_000
-mean = 500_000_000
-scale = time
-
-[sweep.train.batch_size]
-distribution = uniform_pow2
-min = 65_536
-max = 1_048_576
-mean = 262_144
-scale = auto
-
-[sweep.train.horizon]
-distribution = uniform_pow2
-min = 64
-max = 256
-mean = 128
-scale = auto
-
diff --git a/.clang-format b/ocean/impulse_wars/.clang-format
similarity index 99%
rename from .clang-format
rename to ocean/impulse_wars/.clang-format
index d9ba19d3de..98f71421ec 100644
--- a/.clang-format
+++ b/ocean/impulse_wars/.clang-format
@@ -85,7 +85,6 @@ BreakAfterAttributes: Leave
 BreakAfterJavaFieldAnnotations: false
 BreakArrays:     true
 BreakBeforeBinaryOperators: None
-BreakBeforeClosingBracket: Always
 BreakBeforeConceptDeclarations: Always
 BreakBeforeBraces: Custom
 BreakBeforeInlineASMColon: OnlyMultiline
diff --git a/ocean/impulse_wars/binding.c b/ocean/impulse_wars/binding.c
index 170421963b..0e4186fbf2 100644
--- a/ocean/impulse_wars/binding.c
+++ b/ocean/impulse_wars/binding.c
@@ -16,10 +16,10 @@
 
 #define DICTGET(key) dict_get(kwargs, key)->value
 
-void my_init(Env* env, Dict* kwargs) {
+void my_init(Env *env, Dict *kwargs) {
     initEnv(
         env,
-        2,
+        MAX_DRONES,
         1,
         -1,
         0,
@@ -48,17 +48,26 @@ void my_init(Env* env, Dict* kwargs) {
     initMaps(env);
 }
 
-#define _LOG_BUF_SIZE 128
-
-char *droneLog(char *buf, const uint8_t droneIdx, const char *name) {
-    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name);
-    return buf;
-}
-
-char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) {
-    snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name);
-    return buf;
-}
+#define LOG_DRONE_STATS(log, out, idx, idxStr)                                                    \
+    dict_set(out, "drone_" idxStr "_returns", log->stats[idx].returns);                           \
+    dict_set(out, "drone_" idxStr "_distance_traveled", log->stats[idx].distanceTraveled);        \
+    dict_set(out, "drone_" idxStr "_abs_distance_traveled", log->stats[idx].absDistanceTraveled); \
+    dict_set(out, "drone_" idxStr "_brake_time", log->stats[idx].brakeTime);                      \
+    dict_set(out, "drone_" idxStr "_total_bursts", log->stats[idx].totalBursts);                  \
+    dict_set(out, "drone_" idxStr "_bursts_hit", log->stats[idx].burstsHit);                      \
+    dict_set(out, "drone_" idxStr "_energy_emptied", log->stats[idx].energyEmptied);              \
+    dict_set(out, "drone_" idxStr "_shields_broken", log->stats[idx].shieldsBroken);              \
+    dict_set(out, "drone_" idxStr "_own_shield_broken", log->stats[idx].ownShieldBroken);         \
+    dict_set(out, "drone_" idxStr "_self_kills", log->stats[idx].selfKills);                      \
+    dict_set(out, "drone_" idxStr "_kills", log->stats[idx].kills);                               \
+    dict_set(out, "drone_" idxStr "_unknown_kills", log->stats[idx].unknownKills);                \
+    dict_set(out, "drone_" idxStr "_wins", log->stats[idx].wins);                                 \
+    dict_set(out, "drone_" idxStr "_total_shots_fired", log->stats[idx].totalShotsFired);         \
+    dict_set(out, "drone_" idxStr "_total_shots_hit", log->stats[idx].totalShotsHit);             \
+    dict_set(out, "drone_" idxStr "_total_shots_taken", log->stats[idx].totalShotsTaken);         \
+    dict_set(out, "drone_" idxStr "_total_own_shots_taken", log->stats[idx].totalOwnShotsTaken);  \
+    dict_set(out, "drone_" idxStr "_total_picked_up", log->stats[idx].totalWeaponsPickedUp);      \
+    dict_set(out, "drone_" idxStr "_total_shot_distances", log->stats[idx].totalShotDistances)
 
 void my_log(Log *log, Dict *out) {
     dict_set(out, "episode_length", log->length);
@@ -67,39 +76,6 @@ void my_log(Log *log, Dict *out) {
     dict_set(out, "perf", log->stats[0].wins);
     dict_set(out, "score", log->stats[0].wins);
 
-    char buf[_LOG_BUF_SIZE] = {0};
-    for (uint8_t i = 0; i < MAX_DRONES; i++) {
-        dict_set(out, droneLog(buf, i, "returns"), log->stats[i].returns);
-        dict_set(out, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled);
-        dict_set(out, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled);
-        dict_set(out, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime);
-        dict_set(out, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts);
-        dict_set(out, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit);
-        dict_set(out, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied);
-        dict_set(out, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken);
-        dict_set(out, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken);
-        dict_set(out, droneLog(buf, i, "self_kills"), log->stats[i].selfKills);
-        dict_set(out, droneLog(buf, i, "kills"), log->stats[i].kills);
-        dict_set(out, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills);
-        dict_set(out, droneLog(buf, i, "wins"), log->stats[i].wins);
-
-        // useful for debugging weapon balance, but really slows down
-        // sweeps due to adding a ton of extra logging data
-        //
-        // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) {
-        //     dict_set(out, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]);
-        //     dict_set(out, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]);
-        //     dict_set(out, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]);
-        //     dict_set(out, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]);
-        //     dict_set(out, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]);
-        //     dict_set(out, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]);
-        // }
-
-        dict_set(out, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired);
-        dict_set(out, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit);
-        dict_set(out, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken);
-        dict_set(out, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken);
-        dict_set(out, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp);
-        dict_set(out, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances);
-    }
+    LOG_DRONE_STATS(log, out, 0, "0");
+    LOG_DRONE_STATS(log, out, 1, "1");
 }
diff --git a/ocean/impulse_wars/env.h b/ocean/impulse_wars/env.h
index 3fc90be472..2a45003a15 100644
--- a/ocean/impulse_wars/env.h
+++ b/ocean/impulse_wars/env.h
@@ -463,6 +463,7 @@ void computeObs(iwEnv *e) {
 }
 
 void setupEnv(iwEnv *e) {
+    e->isSetup = true;
     e->needsReset = false;
 
     e->stepsLeft = e->totalSteps;
@@ -557,10 +558,11 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui
 
     e->continuousActions = continuousActions;
 
-    //e->truncations = fastCalloc(numDrones, sizeof(uint8_t));
+    // e->truncations = fastCalloc(numDrones, sizeof(uint8_t));
 
     setEnvFrameRate(e);
     e->rng = seed;
+    e->isSetup = false;
     e->needsReset = false;
 
     b2WorldDef worldDef = b2DefaultWorldDef();
@@ -620,8 +622,8 @@ void setRewards(iwEnv *e, float winReward, float selfKillPunishment, float enemy
 void clearEnv(iwEnv *e) {
     // rewards get cleared in stepEnv every step
     // memset(e->masks, 1, e->num_agents * sizeof(uint8_t));
-    memset(e->terminals, 0.0f, e->num_agents * sizeof(uint8_t));
-    //memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t));
+    memset(e->terminals, 0.0f, e->num_agents * sizeof(float));
+    // memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t));
 
     e->episodeLength = 0;
     memset(e->stats, 0x0, sizeof(e->stats));
@@ -666,30 +668,33 @@ void clearEnv(iwEnv *e) {
 }
 
 void destroyEnv(iwEnv *e) {
-    clearEnv(e);
+    if (e->isSetup) {
+        clearEnv(e);
 
-    for (uint8_t i = 0; i < NUM_MAPS; i++) {
-        pathingInfo *info = &e->mapPathing[i];
-        fastFree(info->paths);
-        fastFree(info->pathBuffer);
-    }
-    fastFree(e->mapPathing);
+        for (size_t i = 0; i < cc_array_size(e->walls); i++) {
+            wallEntity *wall = safe_array_get_at(e->walls, i);
+            destroyWall(e, wall, false);
+        }
 
-    for (size_t i = 0; i < cc_array_size(e->walls); i++) {
-        wallEntity *wall = safe_array_get_at(e->walls, i);
-        destroyWall(e, wall, false);
-    }
+        for (size_t i = 0; i < cc_array_size(e->cells); i++) {
+            mapCell *cell = safe_array_get_at(e->cells, i);
+            fastFree(cell);
+        }
 
-    for (size_t i = 0; i < cc_array_size(e->cells); i++) {
-        mapCell *cell = safe_array_get_at(e->cells, i);
-        fastFree(cell);
-    }
+        for (size_t i = 0; i < cc_array_size(e->entities); i++) {
+            entity *ent = safe_array_get_at(e->entities, i);
+            fastFree(ent->id);
+            fastFree(ent);
+        }
 
-    for (size_t i = 0; i < cc_array_size(e->entities); i++) {
-        entity *ent = safe_array_get_at(e->entities, i);
-        fastFree(ent->id);
-        fastFree(ent);
+        for (uint8_t i = 0; i < NUM_MAPS; i++) {
+            pathingInfo *info = &e->mapPathing[i];
+            fastFree(info->paths);
+            fastFree(info->pathBuffer);
+        }
+        fastFree(e->mapPathing);
     }
+
     b2DestroyIdPool(&e->idPool);
 
     cc_array_destroy(e->entities);
@@ -711,7 +716,9 @@ void destroyEnv(iwEnv *e) {
 }
 
 void resetEnv(iwEnv *e) {
-    clearEnv(e);
+    if (e->isSetup) {
+        clearEnv(e);
+    }
     setupEnv(e);
 }
 
@@ -821,7 +828,7 @@ agentActions _computeActions(iwEnv *e, droneEntity *drone, const agentActions *m
     agentActions actions = {0};
 
     if (manualActions == NULL) {
-        float (*envActions)[7] = (float(*)[7])e->actions;
+        float (*envActions)[7] = (float (*)[7])e->actions;
 
         uint8_t move = envActions[drone->idx][0];
         // 0 is no-op for both move and aim
@@ -1247,7 +1254,7 @@ void stepEnv(iwEnv *e) {
                 // }
 
                 DEBUG_LOG("terminating episode");
-                memset(e->terminals, 1.0f, e->num_agents * sizeof(float));            
+                memset(e->terminals, 1.0f, e->num_agents * sizeof(float));
 
                 Log log = {0};
                 log.length = e->episodeLength;
diff --git a/ocean/impulse_wars/helpers.h b/ocean/impulse_wars/helpers.h
index 7fb6a8305a..4458ffae71 100644
--- a/ocean/impulse_wars/helpers.h
+++ b/ocean/impulse_wars/helpers.h
@@ -109,26 +109,12 @@
         ASSERTF(fabs(vec.y - norm.y) < 0.000001f, "vec: %f, %f norm: %f, %f", vec.x, vec.y, norm.x, norm.y); \
     } while (0)
 
-// use malloc when debugging so the address sanitizer can find issues with
-// heap memory, use dlmalloc in release mode for performance; emscripten
-// uses dlmalloc by default so no need to change anything here; dlmalloc
-// sometimes won't compile on macOS so just use malloc and friends
-#if !defined(NDEBUG) || defined(__EMSCRIPTEN__) || defined(__APPLE__)
 #define fastMalloc(size) malloc(size)
 #define fastMallocFn malloc
 #define fastCalloc(nmemb, size) calloc(nmemb, size)
 #define fastCallocFn calloc
 #define fastFree(ptr) free(ptr)
 #define fastFreeFn free
-#else
-#include "dlmalloc.h"
-#define fastMalloc(size) dlmalloc(size)
-#define fastMallocFn dlmalloc
-#define fastCalloc(nmemb, size) dlcalloc(nmemb, size)
-#define fastCallocFn dlcalloc
-#define fastFree(ptr) dlfree(ptr)
-#define fastFreeFn dlfree
-#endif
 
 static inline void create_array(CC_Array **array, size_t initialCap) {
     CC_ArrayConf conf;
diff --git a/ocean/impulse_wars/impulse_wars.c b/ocean/impulse_wars/impulse_wars.c
index 26db5ae5a6..ec8e82cab3 100644
--- a/ocean/impulse_wars/impulse_wars.c
+++ b/ocean/impulse_wars/impulse_wars.c
@@ -16,9 +16,9 @@ int main(void) {
     posix_memalign((void **)&e->observations, sizeof(void *), alignedSize(NUM_DRONES * obsBytes(NUM_DRONES), sizeof(float)));
     e->rewards = fastCalloc(NUM_DRONES, sizeof(float));
     e->actions = fastCalloc(NUM_DRONES * CONTINUOUS_ACTION_SIZE, sizeof(float));
-    //e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t));
+    // e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t));
     e->terminals = fastCalloc(NUM_DRONES, sizeof(float));
-    //e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t));
+    // e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t));
 
     rayClient *client = createRayClient();
     e->client = client;
@@ -41,9 +41,9 @@ int main(void) {
     free(e->observations);
     fastFree(e->actions);
     fastFree(e->rewards);
-    //fastFree(e->masks);
+    // fastFree(e->masks);
     fastFree(e->terminals);
-    //fastFree(e->truncations);
+    // fastFree(e->truncations);
     fastFree(e);
     destroyRayClient(client);
 #endif
diff --git a/ocean/impulse_wars/impulse_wars.py b/ocean/impulse_wars/impulse_wars.py
deleted file mode 100644
index 6fc2f5d27e..0000000000
--- a/ocean/impulse_wars/impulse_wars.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from types import SimpleNamespace
-
-import gymnasium
-import numpy as np
-
-import pufferlib
-from pufferlib.ocean.impulse_wars import binding
-
-
-discMoveToContMove = np.array([
-    [1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0, 0.707107, 0.0],
-    [0.0, 0.707107, 1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0],
-], dtype=np.float32)
-discAimToContAim = np.array([
-    [1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0, 0.382683, 0.707107, 0.92388, 0.0],
-    [0.0, 0.382683, 0.707107, 0.92388, 1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0],
-], dtype=np.float32)
-
-
-class ImpulseWars(pufferlib.PufferEnv):
-    def __init__(
-        self,
-        num_envs: int = 1,
-        num_drones: int = 2,
-        num_agents: int = 1,
-        enable_teams: bool = False,
-        sitting_duck: bool = False,
-        continuous: bool = False,
-        is_training: bool = True,
-        human_control: bool = False,
-        reward_win: float = 2.0,
-        reward_self_kill: float = -1.0,
-        reward_enemy_death: float = 1.0,
-        reward_enemy_kill: float = 1.0,
-        reward_death: float = -0.25,
-        reward_energy_emptied: float = -0.75,
-        reward_weapon_pickup: float = 0.5,
-        reward_shield_break: float = 0.5,
-        reward_shot_hit_coef: float = 0.005, 
-        reward_explosion_hit_coef: float = 0.005,
-        seed: int = 0,
-        render: bool = False,
-        report_interval: int = 64,
-        buf = None,
-    ):
-        self.obsInfo = SimpleNamespace(**binding.get_consts(num_drones))
-
-        if num_envs <= 0:
-            raise ValueError("num_envs must be greater than 0")
-        if num_drones > self.obsInfo.maxDrones or num_drones <= 0:
-            raise ValueError(f"num_drones must greater than 0 and less than or equal to {self.obsInfo.maxDrones}")
-        if num_agents > num_drones or num_agents <= 0:
-            raise ValueError("num_agents must greater than 0 and less than or equal to num_drones")
-        if enable_teams and (num_drones % 2 != 0 or num_drones <= 2):
-            raise ValueError("enable_teams is only supported for even numbers of drones greater than 2")
-
-        self.numDrones = num_drones
-        self.continuous = continuous
-
-        self.num_agents = num_agents * num_envs
-        self.tick = 0
-
-        # map observations are bit packed to save space, and scalar
-        # observations need to be floats
-        self.single_observation_space = gymnasium.spaces.Box(
-            low=0, high=255, shape=(self.obsInfo.obsBytes,), dtype=np.uint8
-        )
-
-        if self.continuous:
-            # action space is actually bounded by (-1, 1) but pufferlib
-            # will check that actions are within the bounds of the action
-            # space before actions get to the env, and we ensure the actions
-            # are bounded there; so set bounds to (-inf, inf) here so
-            # action bounds checks pass
-            self.single_action_space = gymnasium.spaces.Box(
-                low=float("-inf"), high=float("inf"), shape=(self.obsInfo.contActionsSize,), dtype=np.float32
-            )
-        else:
-            self.single_action_space = gymnasium.spaces.MultiDiscrete(
-                [
-                    9,  # move, noop + 8 directions
-                    17,  # aim, noop + 16 directions
-                    2,  # shoot or not
-                    2,  # brake or not
-                    2,  # burst
-                ]
-            )
-
-        self.report_interval = report_interval
-        self.render_mode = "human" if render else None
-
-        super().__init__(buf)
-        if not self.continuous:
-            self.actions = np.zeros((self.num_agents, self.obsInfo.contActionsSize), dtype=np.float32)
-
-        self.c_envs = binding.vec_init(
-            self.observations,
-            self.actions,
-            self.rewards,
-            self.terminals,
-            self.truncations,
-            num_envs,
-            seed,
-            num_drones=num_drones,
-            num_agents=num_agents,
-            map_idx=-1,
-            enable_teams=enable_teams,
-            sitting_duck=sitting_duck,
-            is_training=is_training,
-            continuous=continuous,
-            reward_win=reward_win,
-            reward_self_kill=reward_self_kill,
-            reward_enemy_death=reward_enemy_death,
-            reward_enemy_kill=reward_enemy_kill,
-            reward_death=reward_death,
-            reward_energy_emptied=reward_energy_emptied,
-            reward_weapon_pickup=reward_weapon_pickup,
-            reward_shield_break=reward_shield_break,
-            reward_shot_hit_coef=reward_shot_hit_coef,
-            reward_explosion_hit_coef=reward_explosion_hit_coef,
-        )
-
-        binding.shared(self.c_envs)
-
-    def reset(self, seed=None):
-        self.tick = 0
-        if seed is None:
-            binding.vec_reset(self.c_envs, 0)
-        else:
-            binding.vec_reset(self.c_envs, seed)
-        return self.observations, []
-
-    def step(self, actions):
-        if self.continuous:
-            self.actions[:] = actions
-        else:
-            contMove = discMoveToContMove[:, actions[:, 0]].T
-            contAim =  discAimToContAim[:, actions[:, 1]].T
-            contRest = actions[:, 2:].astype(np.float32)
-            self.actions[:] = np.concatenate([contMove, contAim, contRest], axis=1)
-
-        self.tick += 1    
-        binding.vec_step(self.c_envs)
-
-        infos = []
-        if self.tick % self.report_interval == 0:
-            infos.append(binding.vec_log(self.c_envs))
-
-        return self.observations, self.rewards, self.terminals, self.truncations, infos
-
-    def render(self):
-        binding.vec_render(self.c_envs, 0)
-
-    def close(self):
-        binding.vec_close(self.c_envs)
-
-
-def testPerf(timeout, actionCache, numEnvs):
-    env = ImpulseWars(numEnvs)
-
-    import time
-
-    np.random.seed(int(time.time()))
-    actions = np.random.uniform(-1, 1, (actionCache, env.num_agents, 7))
-
-    tick = 0
-    start = time.time()
-    while time.time() - start < timeout:
-        action = actions[tick % actionCache]
-        env.step(action)
-        tick += 1
-
-    sps = numEnvs * (tick / (time.time() - start))
-    print(f"SPS: {sps:,}")
-    print(f"Steps: {numEnvs * tick}")
-
-    env.close()
-
-
-if __name__ == "__main__":
-    testPerf(timeout=5, actionCache=1024, numEnvs=1)
diff --git a/ocean/impulse_wars/pyproject.toml b/ocean/impulse_wars/pyproject.toml
deleted file mode 100644
index df67b2bd17..0000000000
--- a/ocean/impulse_wars/pyproject.toml
+++ /dev/null
@@ -1,25 +0,0 @@
-[build-system]
-requires = ["scikit-build-core>=0.10", "autopxd2>=2.5.0", "cython>=3.0.11"]
-build-backend = "scikit_build_core.build"
-
-[project]
-name = "binding"
-version = "1.0.0"
-requires-python = ">=3.11"
-
-[tool.scikit-build]
-minimum-version = "build-system.requires"
-cmake.build-type = "Release"
-build.verbose = true
-logging.level = "INFO"
-
-[tool.scikit-build.cmake.define]
-BUILD_PYTHON_MODULE = true
-CMAKE_C_COMPILER = "clang-20"
-
-[tool.ruff]
-line-length = 110
-
-[tool.ruff.lint]
-# skip "Module level import not at top of file"
-ignore = ["E402"]
diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h
index 113060ba2e..682b280e72 100644
--- a/ocean/impulse_wars/types.h
+++ b/ocean/impulse_wars/types.h
@@ -10,7 +10,7 @@
 
 #include "settings.h"
 
-#define _MAX_DRONES 4
+#define _MAX_DRONES 2
 
 const uint8_t NUM_WALL_TYPES = 3;
 
@@ -439,15 +439,16 @@ typedef struct iwEnv {
     uint8_t *observations;
     float *rewards;
     float *actions;
-    //uint8_t *masks;
+    // uint8_t *masks;
     float *terminals;
-    //uint8_t *truncations;
+    // uint8_t *truncations;
 
     uint8_t frameRate;
     float deltaTime;
     uint8_t frameSkip;
     uint8_t box2dSubSteps;
     uint64_t rng;
+    bool isSetup;
     bool needsReset;
 
     uint16_t episodeLength;
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 8fc0c03a89..19c54791aa 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -146,7 +146,7 @@ def print_dashboard(args, model_size, flat_logs, clear=False, idx=[0],
             u = left if i % 2 == 0 else right
             u.add_row(f'{b2}{k[4:]}', f'{b2}{v:.3f}')
             i += 1
-            if i == 30:
+            if i == 60:
                 break
 
     if clear:
diff --git a/resources/impulse_wars/shaders/gls330/bloom.fs b/resources/impulse_wars/shaders/gls330/bloom.fs
index 246acb6af8..bcd2fa189d 100644
--- a/resources/impulse_wars/shaders/gls330/bloom.fs
+++ b/resources/impulse_wars/shaders/gls330/bloom.fs
@@ -23,7 +23,7 @@
 #define BLOOM_ADDITIVE 1
 #define BLOOM_SOFT_LIGHT 2
 
-noperspective in vec2 fragTexCoord;
+in vec2 fragTexCoord;
 
 uniform sampler2D uTexColor;
 uniform sampler2D uTexBloomBlur;
diff --git a/resources/impulse_wars/shaders/gls330/blur.fs b/resources/impulse_wars/shaders/gls330/blur.fs
index f853a495e1..880d2ad2cc 100644
--- a/resources/impulse_wars/shaders/gls330/blur.fs
+++ b/resources/impulse_wars/shaders/gls330/blur.fs
@@ -22,7 +22,7 @@
 
 #version 330 core
 
-noperspective in vec2 fragTexCoord;
+in vec2 fragTexCoord;
 
 uniform sampler2D uTexture;
 uniform vec2 uTexelDir;
diff --git a/src/bindings.cu b/src/bindings.cu
index 4469cb512c..4f413e8983 100644
--- a/src/bindings.cu
+++ b/src/bindings.cu
@@ -106,7 +106,7 @@ pybind11::dict puf_eval_log(pybind11::object pufferl_obj) {
     pufferl.last_log_step = pufferl.global_step;
  
     pybind11::dict env_dict;
-    Dict* env_out = create_dict(32);
+    Dict* env_out = create_dict(64);
     static_vec_eval_log(pufferl.vec, env_out);
     for (int i = 0; i < env_out->size; i++) {
         env_dict[env_out->items[i].key] = env_out->items[i].value;
@@ -318,7 +318,7 @@ void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) {
 }
 
 py::dict vec_log(VecEnv& ve) {
-    Dict* out = create_dict(32);
+    Dict* out = create_dict(64);
     static_vec_log(ve.vec, out);
     py::dict result;
     for (int i = 0; i < out->size; i++) {
diff --git a/src/bindings_cpu.cpp b/src/bindings_cpu.cpp
index 5ba4dc81e5..a4e0b7633c 100644
--- a/src/bindings_cpu.cpp
+++ b/src/bindings_cpu.cpp
@@ -141,7 +141,7 @@ static void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) {
 }
 
 static py::dict vec_log(VecEnv& ve) {
-    Dict* out = create_dict(32);
+    Dict* out = create_dict(64);
     static_vec_log(ve.vec, out);
     py::dict result;
     for (int i = 0; i < out->size; i++)
diff --git a/src/pufferlib.cu b/src/pufferlib.cu
index 3a3e6ee00e..c74d3553ee 100644
--- a/src/pufferlib.cu
+++ b/src/pufferlib.cu
@@ -330,7 +330,7 @@ typedef struct {
 } PuffeRL;
 
 Dict* log_environments_impl(PuffeRL& pufferl) {
-    Dict* out = create_dict(32);
+    Dict* out = create_dict(64);
     static_vec_log(pufferl.vec, out);
     return out;
 }

From 3c9ae6fc77aaf520f35793d7ae1dee6e4661ce9f Mon Sep 17 00:00:00 2001
From: Andrew LeFevre <jalefevre@liberty.edu>
Date: Tue, 28 Apr 2026 15:49:06 -0400
Subject: [PATCH 4/4] removed now unused dlmalloc dep

---
 vendor/dlmalloc.h | 6264 ---------------------------------------------
 1 file changed, 6264 deletions(-)
 delete mode 100644 vendor/dlmalloc.h

diff --git a/vendor/dlmalloc.h b/vendor/dlmalloc.h
deleted file mode 100644
index 4ef7c9cfd5..0000000000
--- a/vendor/dlmalloc.h
+++ /dev/null
@@ -1,6264 +0,0 @@
-/*
-  Default header file for malloc-2.7.2, written by Doug Lea
-  and released to the public domain.  Use, modify, and redistribute
-  this code without permission or acknowledgement in any way you wish.
-  Send questions, comments, complaints, performance data, etc to
-  dl@cs.oswego.edu.
-
-  last update: Sun Feb 25 18:38:11 2001  Doug Lea  (dl at gee)
-
-  This header is for ANSI C/C++ only.  You can set either of
-  the following #defines before including:
-
-  * If USE_DL_PREFIX is defined, it is assumed that malloc.c
-    was also compiled with this option, so all routines
-    have names starting with "dl".
-
-  * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this
-    file will be #included AFTER <malloc.h>. This is needed only if
-    your system defines a struct mallinfo that is incompatible with the
-    standard one declared here.  Otherwise, you can include this file
-    INSTEAD of your system system <malloc.h>.  At least on ANSI, all
-    declarations should be compatible with system versions
-*/
-
-#ifndef MALLOC_270_H
-#define MALLOC_270_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stddef.h> /* for size_t */
-
-#define USE_DL_PREFIX
-
-/*
-  malloc(size_t n)
-  Returns a pointer to a newly allocated chunk of at least n bytes, or
-  null if no space is available. Additionally, on failure, errno is
-  set to ENOMEM on ANSI C systems.
-
-  If n is zero, malloc returns a minimum-sized chunk. The minimum size
-  is 16 bytes on most 32bit systems, and either 24 or 32 bytes on
-  64bit systems, depending on internal size and alignment restrictions.
-
-  On most systems, size_t is an unsigned type.  Calls with values of n
-  that appear "negative" when signed are interpreted as requests for
-  huge amounts of space, which will most often fail.
-
-  The maximum allowed value of n differs across systems, but is in all
-  cases less (typically by 8K) than the maximum representable value of
-  a size_t. Requests greater than this value result in failure.
-*/
-
-#ifndef USE_DL_PREFIX
-void *malloc(size_t);
-#else
-void *dlmalloc(size_t);
-#endif
-
-/*
-  free(void* p)
-  Releases the chunk of memory pointed to by p, that had been previously
-  allocated using malloc or a related routine such as realloc.
-  It has no effect if p is null. It can have arbitrary (and bad!)
-  effects if p has already been freed or was not obtained via malloc.
-
-  Unless disabled using mallopt, freeing very large spaces will,
-  when possible, automatically trigger operations that give
-  back unused memory to the system, thus reducing program footprint.
-*/
-#ifndef USE_DL_PREFIX
-void free(void *);
-#else
-void dlfree(void *);
-#endif
-
-/*
-  calloc(size_t n_elements, size_t element_size);
-  Returns a pointer to n_elements * element_size bytes, with all locations
-  set to zero.
-*/
-#ifndef USE_DL_PREFIX
-void *calloc(size_t, size_t);
-#else
-void *dlcalloc(size_t, size_t);
-#endif
-
-/*
-  realloc(void* p, size_t n)
-  Returns a pointer to a chunk of size n that contains the same data
-  as does chunk p up to the minimum of (n, p's size) bytes.
-
-  The returned pointer may or may not be the same as p. The algorithm
-  prefers extending p when possible, otherwise it employs the
-  equivalent of a malloc-copy-free sequence.
-
-  If p is null, realloc is equivalent to malloc.
-
-  If space is not available, realloc returns null, errno is set (if on
-  ANSI) and p is NOT freed.
-
-  if n is for fewer bytes than already held by p, the newly unused
-  space is lopped off and freed if possible.  Unless the #define
-  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
-  zero (re)allocates a minimum-sized chunk.
-
-  Large chunks that were internally obtained via mmap will always
-  be reallocated using malloc-copy-free sequences unless
-  the system supports MREMAP (currently only linux).
-
-  The old unix realloc convention of allowing the last-free'd chunk
-  to be used as an argument to realloc is not supported.
-*/
-
-#ifndef USE_DL_PREFIX
-void *realloc(void *, size_t);
-#else
-void *dlrealloc(void *, size_t);
-#endif
-
-/*
-  memalign(size_t alignment, size_t n);
-  Returns a pointer to a newly allocated chunk of n bytes, aligned
-  in accord with the alignment argument.
-
-  The alignment argument should be a power of two. If the argument is
-  not a power of two, the nearest greater power is used.
-  8-byte alignment is guaranteed by normal malloc calls, so don't
-  bother calling memalign with an argument of 8 or less.
-
-  Overreliance on memalign is a sure way to fragment space.
-*/
-
-#ifndef USE_DL_PREFIX
-void *memalign(size_t, size_t);
-#else
-void *dlmemalign(size_t, size_t);
-#endif
-
-/*
-  valloc(size_t n);
-  Allocates a page-aligned chunk of at least n bytes.
-  Equivalent to memalign(pagesize, n), where pagesize is the page
-  size of the system. If the pagesize is unknown, 4096 is used.
-*/
-
-#ifndef USE_DL_PREFIX
-void *valloc(size_t);
-#else
-void *dlvalloc(size_t);
-#endif
-
-/*
-  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
-
-  independent_calloc is similar to calloc, but instead of returning a
-  single cleared space, it returns an array of pointers to n_elements
-  independent elements, each of which can hold contents of size
-  elem_size.  Each element starts out cleared, and can be
-  independently freed, realloc'ed etc. The elements are guaranteed to
-  be adjacently allocated (this is not guaranteed to occur with
-  multiple callocs or mallocs), which may also improve cache locality
-  in some applications.
-
-  The "chunks" argument is optional (i.e., may be null, which is
-  probably the most typical usage). If it is null, the returned array
-  is itself dynamically allocated and should also be freed when it is
-  no longer needed. Otherwise, the chunks array must be of at least
-  n_elements in length. It is filled in with the pointers to the
-  chunks.
-
-  In either case, independent_calloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and "chunks"
-  is null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use regular calloc and assign pointers into this
-  space to represent elements.  (In this case though, you cannot
-  independently free elements.)
-
-  independent_calloc simplifies and speeds up implementations of many
-  kinds of pools.  It may also be useful when constructing large data
-  structures that initially have a fixed number of fixed-sized nodes,
-  but the number is not known at compile time, and some of the nodes
-  may later need to be freed. For example:
-
-  struct Node { int item; struct Node* next; };
-
-  struct Node* build_list() {
-    struct Node** pool;
-    int n = read_number_of_nodes_needed();
-    if (n <= 0) return 0;
-    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
-    if (pool == 0) return 0; // failure
-    // organize into a linked list...
-    struct Node* first = pool[0];
-    for (i = 0; i < n-1; ++i)
-      pool[i]->next = pool[i+1];
-    free(pool);     // Can now free the array (or not, if it is needed later)
-    return first;
-  }
-*/
-
-#ifndef USE_DL_PREFIX
-void **independent_calloc(size_t, size_t, void **);
-#else
-void **dlindependent_calloc(size_t, size_t, void **);
-#endif
-
-/*
-  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-
-  independent_comalloc allocates, all at once, a set of n_elements
-  chunks with sizes indicated in the "sizes" array.    It returns
-  an array of pointers to these elements, each of which can be
-  independently freed, realloc'ed etc. The elements are guaranteed to
-  be adjacently allocated (this is not guaranteed to occur with
-  multiple callocs or mallocs), which may also improve cache locality
-  in some applications.
-
-  The "chunks" argument is optional (i.e., may be null). If it is null
-  the returned array is itself dynamically allocated and should also
-  be freed when it is no longer needed. Otherwise, the chunks array
-  must be of at least n_elements in length. It is filled in with the
-  pointers to the chunks.
-
-  In either case, independent_comalloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and chunks is
-  null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use a single regular malloc, and assign pointers at
-  particular offsets in the aggregate space. (In this case though, you
-  cannot independently free elements.)
-
-  independent_comallac differs from independent_calloc in that each
-  element may have a different size, and also that it does not
-  automatically clear elements.
-
-  independent_comalloc can be used to speed up allocation in cases
-  where several structs or objects must always be allocated at the
-  same time.  For example:
-
-  struct Head { ... }
-  struct Foot { ... }
-
-  void send_message(char* msg) {
-    int msglen = strlen(msg);
-    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
-    void* chunks[3];
-    if (independent_comalloc(3, sizes, chunks) == 0)
-      die();
-    struct Head* head = (struct Head*)(chunks[0]);
-    char*        body = (char*)(chunks[1]);
-    struct Foot* foot = (struct Foot*)(chunks[2]);
-    // ...
-  }
-
-  In general though, independent_comalloc is worth using only for
-  larger values of n_elements. For small values, you probably won't
-  detect enough difference from series of malloc calls to bother.
-
-  Overuse of independent_comalloc can increase overall memory usage,
-  since it cannot reuse existing noncontiguous small chunks that
-  might be available for some of the elements.
-*/
-
-#ifndef USE_DL_PREFIX
-void **independent_comalloc(size_t, size_t *, void **);
-#else
-void **dlindependent_comalloc(size_t, size_t *, void **);
-#endif
-
-/*
-  pvalloc(size_t n);
-  Equivalent to valloc(minimum-page-that-holds(n)), that is,
-  round up n to nearest pagesize.
- */
-
-#ifndef USE_DL_PREFIX
-void *pvalloc(size_t);
-#else
-void *dlpvalloc(size_t);
-#endif
-
-/*
-  cfree(void* p);
-  Equivalent to free(p).
-
-  cfree is needed/defined on some systems that pair it with calloc,
-  for odd historical reasons (such as: cfree is used in example
-  code in the first edition of K&R).
-*/
-
-#ifndef USE_DL_PREFIX
-void cfree(void *);
-#else
-void dlcfree(void *);
-#endif
-
-/*
-  malloc_trim(size_t pad);
-
-  If possible, gives memory back to the system (via negative
-  arguments to sbrk) if there is unused memory at the `high' end of
-  the malloc pool. You can call this after freeing large blocks of
-  memory to potentially reduce the system-level memory requirements
-  of a program. However, it cannot guarantee to reduce memory. Under
-  some allocation patterns, some large free blocks of memory will be
-  locked between two used chunks, so they cannot be given back to
-  the system.
-
-  The `pad' argument to malloc_trim represents the amount of free
-  trailing space to leave untrimmed. If this argument is zero,
-  only the minimum amount of memory to maintain internal data
-  structures will be left (one page or less). Non-zero arguments
-  can be supplied to maintain enough trailing space to service
-  future expected allocations without having to re-obtain memory
-  from the system.
-
-  Malloc_trim returns 1 if it actually released any memory, else 0.
-  On systems that do not support "negative sbrks", it will always
-  return 0.
-*/
-
-#ifndef USE_DL_PREFIX
-int malloc_trim(size_t);
-#else
-int dlmalloc_trim(size_t);
-#endif
-
-/*
-  malloc_usable_size(void* p);
-
-  Returns the number of bytes you can actually use in an allocated
-  chunk, which may be more than you requested (although often not) due
-  to alignment and minimum size constraints.  You can use this many
-  bytes without worrying about overwriting other allocated
-  objects. This is not a particularly great programming practice. But
-  malloc_usable_size can be more useful in debugging and assertions,
-  for example:
-
-  p = malloc(n);
-  assert(malloc_usable_size(p) >= 256);
-*/
-
-#ifndef USE_DL_PREFIX
-size_t malloc_usable_size(void *);
-#else
-size_t dlmalloc_usable_size(void *);
-#endif
-
-/*
-  malloc_stats();
-  Prints on stderr the amount of space obtained from the system (both
-  via sbrk and mmap), the maximum amount (which may be more than
-  current if malloc_trim and/or munmap got called), and the current
-  number of bytes allocated via malloc (or realloc, etc) but not yet
-  freed. Note that this is the number of bytes allocated, not the
-  number requested. It will be larger than the number requested
-  because of alignment and bookkeeping overhead. Because it includes
-  alignment wastage as being in use, this figure may be greater than
-  zero even when no user-level chunks are allocated.
-
-  The reported current and maximum system memory can be inaccurate if
-  a program makes other calls to system memory allocation functions
-  (normally sbrk) outside of malloc.
-
-  malloc_stats prints only the most commonly interesting statistics.
-  More information can be obtained by calling mallinfo.
-*/
-
-#ifndef USE_DL_PREFIX
-void malloc_stats(void);
-#else
-void dlmalloc_stats(void);
-#endif
-
-/*
-  mallinfo()
-  Returns (by copy) a struct containing various summary statistics:
-
-  arena:     current total non-mmapped bytes allocated from system
-  ordblks:   the number of free chunks
-  smblks:    the number of fastbin blocks (i.e., small chunks that
-               have been freed but not use resused or consolidated)
-  hblks:     current number of mmapped regions
-  hblkhd:    total bytes held in mmapped regions
-  usmblks:   the maximum total allocated space. This will be greater
-                than current total if trimming has occurred.
-  fsmblks:   total bytes held in fastbin blocks
-  uordblks:  current total allocated space (normal or mmapped)
-  fordblks:  total free space
-  keepcost:  the maximum number of bytes that could ideally be released
-               back to system via malloc_trim. ("ideally" means that
-               it ignores page restrictions etc.)
-
-  The names of some of these fields don't bear much relation with
-  their contents because this struct was defined as standard in
-  SVID/XPG so reflects the malloc implementation that was then used
-  in SystemV Unix.
-
-  The original SVID version of this struct, defined on most systems
-  with mallinfo, declares all fields as ints. But some others define
-  as unsigned long. If your system defines the fields using a type of
-  different width than listed here, you should #include your system
-  version before including this file.  The struct declaration is
-  suppressed if _MALLOC_H is defined (which is done in most system
-  malloc.h files). You can also suppress it by defining
-  HAVE_USR_INCLUDE_MALLOC_H.
-
-  Because these fields are ints, but internal bookkeeping is done with
-  unsigned longs, the reported values may appear as negative, and may
-  wrap around zero and thus be inaccurate.
-*/
-
-#ifndef HAVE_USR_INCLUDE_MALLOC_H
-#ifndef _MALLOC_H
-struct mallinfo {
-    int arena;
-    int ordblks;
-    int smblks;
-    int hblks;
-    int hblkhd;
-    int usmblks;
-    int fsmblks;
-    int uordblks;
-    int fordblks;
-    int keepcost;
-};
-#endif
-#endif
-
-#ifndef USE_DL_PREFIX
-struct mallinfo mallinfo(void);
-#else
-struct mallinfo mallinfo(void);
-#endif
-
-/*
-  mallopt(int parameter_number, int parameter_value)
-  Sets tunable parameters The format is to provide a
-  (parameter-number, parameter-value) pair.  mallopt then sets the
-  corresponding parameter to the argument value if it can (i.e., so
-  long as the value is meaningful), and returns 1 if successful else
-  0.  SVID/XPG defines four standard param numbers for mallopt,
-  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
-  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
-  so setting them has no effect. But this malloc also supports four
-  other options in mallopt. See below for details.  Briefly, supported
-  parameters are as follows (listed defaults are for "typical"
-  configurations).
-
-  Symbol            param #   default    allowed param values
-  M_MXFAST          1         64         0-80  (0 disables fastbins)
-  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
-  M_TOP_PAD        -2         0          any
-  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
-  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
-*/
-
-#ifndef USE_DL_PREFIX
-int mallopt(int, int);
-#else
-int dlmallopt(int, int);
-#endif
-
-/* Descriptions of tuning options */
-
-/*
-  M_MXFAST is the maximum request size used for "fastbins", special bins
-  that hold returned chunks without consolidating their spaces. This
-  enables future requests for chunks of the same size to be handled
-  very quickly, but can increase fragmentation, and thus increase the
-  overall memory footprint of a program.
-
-  This malloc manages fastbins very conservatively yet still
-  efficiently, so fragmentation is rarely a problem for values less
-  than or equal to the default.  The maximum supported value of MXFAST
-  is 80. You wouldn't want it any higher than this anyway.  Fastbins
-  are designed especially for use with many small structs, objects or
-  strings -- the default handles structs/objects/arrays with sizes up
-  to 8 4byte fields, or small strings representing words, tokens,
-  etc. Using fastbins for larger objects normally worsens
-  fragmentation without improving speed.
-
-  You can reduce M_MXFAST to 0 to disable all use of fastbins.  This
-  causes the malloc algorithm to be a closer approximation of
-  fifo-best-fit in all cases, not just for larger requests, but will
-  generally cause it to be slower.
-*/
-
-#ifndef M_MXFAST
-#define M_MXFAST 1
-#endif
-
-/*
-  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
-  to keep before releasing via malloc_trim in free().
-
-  Automatic trimming is mainly useful in long-lived programs.
-  Because trimming via sbrk can be slow on some systems, and can
-  sometimes be wasteful (in cases where programs immediately
-  afterward allocate more large chunks) the value should be high
-  enough so that your overall system performance would improve by
-  releasing this much memory.
-
-  The trim threshold and the mmap control parameters (see below)
-  can be traded off with one another. Trimming and mmapping are
-  two different ways of releasing unused memory back to the
-  system. Between these two, it is often possible to keep
-  system-level demands of a long-lived program down to a bare
-  minimum. For example, in one test suite of sessions measuring
-  the XF86 X server on Linux, using a trim threshold of 128K and a
-  mmap threshold of 192K led to near-minimal long term resource
-  consumption.
-
-  If you are using this malloc in a long-lived program, it should
-  pay to experiment with these values.  As a rough guide, you
-  might set to a value close to the average size of a process
-  (program) running on your system.  Releasing this much memory
-  would allow such a process to run in memory.  Generally, it's
-  worth it to tune for trimming rather tham memory mapping when a
-  program undergoes phases where several large chunks are
-  allocated and released in ways that can reuse each other's
-  storage, perhaps mixed with phases where there are no such
-  chunks at all.  And in well-behaved long-lived programs,
-  controlling release of large blocks via trimming versus mapping
-  is usually faster.
-
-  However, in most programs, these parameters serve mainly as
-  protection against the system-level effects of carrying around
-  massive amounts of unneeded memory. Since frequent calls to
-  sbrk, mmap, and munmap otherwise degrade performance, the default
-  parameters are set to relatively high values that serve only as
-  safeguards.
-
-  The trim value It must be greater than page size to have any useful
-  effect.  To disable trimming completely, you can set to
-  (unsigned long)(-1)
-
-  Trim settings interact with fastbin (MXFAST) settings: Unless
-  compiled with TRIM_FASTBINS defined, automatic trimming never takes
-  place upon freeing a chunk with size less than or equal to
-  MXFAST. Trimming is instead delayed until subsequent freeing of
-  larger chunks. However, you can still force an attempted trim by
-  calling malloc_trim.
-
-  Also, trimming is not generally possible in cases where
-  the main arena is obtained via mmap.
-
-  Note that the trick some people use of mallocing a huge space and
-  then freeing it at program startup, in an attempt to reserve system
-  memory, doesn't have the intended effect under automatic trimming,
-  since that memory will immediately be returned to the system.
-*/
-
-#define M_TRIM_THRESHOLD -1
-
-/*
-  M_TOP_PAD is the amount of extra `padding' space to allocate or
-  retain whenever sbrk is called. It is used in two ways internally:
-
-  * When sbrk is called to extend the top of the arena to satisfy
-  a new malloc request, this much padding is added to the sbrk
-  request.
-
-  * When malloc_trim is called automatically from free(),
-  it is used as the `pad' argument.
-
-  In both cases, the actual amount of padding is rounded
-  so that the end of the arena is always a system page boundary.
-
-  The main reason for using padding is to avoid calling sbrk so
-  often. Having even a small pad greatly reduces the likelihood
-  that nearly every malloc request during program start-up (or
-  after trimming) will invoke sbrk, which needlessly wastes
-  time.
-
-  Automatic rounding-up to page-size units is normally sufficient
-  to avoid measurable overhead, so the default is 0.  However, in
-  systems where sbrk is relatively slow, it can pay to increase
-  this value, at the expense of carrying around more memory than
-  the program needs.
-*/
-
-#define M_TOP_PAD -2
-
-/*
-  M_MMAP_THRESHOLD is the request size threshold for using mmap()
-  to service a request. Requests of at least this size that cannot
-  be allocated using already-existing space will be serviced via mmap.
-  (If enough normal freed space already exists it is used instead.)
-
-  Using mmap segregates relatively large chunks of memory so that
-  they can be individually obtained and released from the host
-  system. A request serviced through mmap is never reused by any
-  other request (at least not directly; the system may just so
-  happen to remap successive requests to the same locations).
-
-  Segregating space in this way has the benefits that:
-
-   1. Mmapped space can ALWAYS be individually released back
-      to the system, which helps keep the system level memory
-      demands of a long-lived program low.
-   2. Mapped memory can never become `locked' between
-      other chunks, as can happen with normally allocated chunks, which
-      means that even trimming via malloc_trim would not release them.
-   3. On some systems with "holes" in address spaces, mmap can obtain
-      memory that sbrk cannot.
-
-  However, it has the disadvantages that:
-
-   1. The space cannot be reclaimed, consolidated, and then
-      used to service later requests, as happens with normal chunks.
-   2. It can lead to more wastage because of mmap page alignment
-      requirements
-   3. It causes malloc performance to be more dependent on host
-      system memory management support routines.
-
-  The advantages of mmap nearly always outweigh disadvantages for
-  "large" chunks, but the value of "large" varies across systems.  The
-  default is an empirically derived value that works well in most
-  systems.
-*/
-
-#define M_MMAP_THRESHOLD -3
-
-/*
-  M_MMAP_MAX is the maximum number of requests to simultaneously
-  service using mmap. This parameter exists because
-  some systems have a limited number of internal tables for
-  use by mmap, and using more than a few of them may degrade
-  performance.
-
-  The default is set to a value that serves only as a safeguard.
-  Setting to 0 disables use of mmap for servicing large requests.  If
-  mmap is not supported on a system, the default value is 0, and
-  attempts to set it to non-zero values in mallopt will fail.
-*/
-
-#define M_MMAP_MAX -4
-
-/* Unused SVID2/XPG mallopt options, listed for completeness */
-
-#ifndef M_NBLKS
-#define M_NLBLKS 2 /* UNUSED in this malloc */
-#endif
-#ifndef M_GRAIN
-#define M_GRAIN 3 /* UNUSED in this malloc */
-#endif
-#ifndef M_KEEP
-#define M_KEEP 4 /* UNUSED in this malloc */
-#endif
-
-/*
-  Some malloc.h's declare alloca, even though it is not part of malloc.
-*/
-
-#ifndef _ALLOCA_H
-extern void *alloca(size_t);
-#endif
-
-/*
-  This is a version (aka dlmalloc) of malloc/free/realloc written by
-  Doug Lea and released to the public domain.  Use, modify, and
-  redistribute this code without permission or acknowledgement in any
-  way you wish.  Send questions, comments, complaints, performance
-  data, etc to dl@cs.oswego.edu
-
-* VERSION 2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
-
-   Note: There may be an updated version of this malloc obtainable at
-           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
-         Check before installing!
-
-* Quickstart
-
-  This library is all in one file to simplify the most common usage:
-  ftp it, compile it (-O), and link it into another program. All
-  of the compile-time options default to reasonable values for use on
-  most unix platforms. Compile -DWIN32 for reasonable defaults on windows.
-  You might later want to step through various compile-time and dynamic
-  tuning options.
-
-  For convenience, an include file for code using this malloc is at:
-     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h
-  You don't really need this .h file unless you call functions not
-  defined in your system include files.  The .h file contains only the
-  excerpts from this file needed for using this malloc on ANSI C/C++
-  systems, so long as you haven't changed compile-time options about
-  naming and tuning parameters.  If you do, then you can create your
-  own malloc.h that does include all settings by cutting at the point
-  indicated below.
-
-* Why use this malloc?
-
-  This is not the fastest, most space-conserving, most portable, or
-  most tunable malloc ever written. However it is among the fastest
-  while also being among the most space-conserving, portable and tunable.
-  Consistent balance across these factors results in a good general-purpose
-  allocator for malloc-intensive programs.
-
-  The main properties of the algorithms are:
-  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
-    with ties normally decided via FIFO (i.e. least recently used).
-  * For small (<= 64 bytes by default) requests, it is a caching
-    allocator, that maintains pools of quickly recycled chunks.
-  * In between, and for combinations of large and small requests, it does
-    the best it can trying to meet both goals at once.
-  * For very large requests (>= 128KB by default), it relies on system
-    memory mapping facilities, if supported.
-
-  For a longer but slightly out of date high-level description, see
-     http://gee.cs.oswego.edu/dl/html/malloc.html
-
-  You may already by default be using a C library containing a malloc
-  that is  based on some version of this malloc (for example in
-  linux). You might still want to use the one in this file in order to
-  customize settings or to avoid overheads associated with library
-  versions.
-
-* Contents, described in more detail in "description of public routines" below.
-
-  Standard (ANSI/SVID/...)  functions:
-    malloc(size_t n);
-    calloc(size_t n_elements, size_t element_size);
-    free(Void_t* p);
-    realloc(Void_t* p, size_t n);
-    memalign(size_t alignment, size_t n);
-    valloc(size_t n);
-    mallinfo()
-    mallopt(int parameter_number, int parameter_value)
-
-  Additional functions:
-    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
-    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
-    pvalloc(size_t n);
-    cfree(Void_t* p);
-    malloc_trim(size_t pad);
-    malloc_usable_size(Void_t* p);
-    malloc_stats();
-
-* Vital statistics:
-
-  Supported pointer representation:       4 or 8 bytes
-  Supported size_t  representation:       4 or 8 bytes
-       Note that size_t is allowed to be 4 bytes even if pointers are 8.
-       You can adjust this by defining INTERNAL_SIZE_T
-
-  Alignment:                              2 * sizeof(size_t) (default)
-       (i.e., 8 byte alignment with 4byte size_t). This suffices for
-       nearly all current machines and C compilers. However, you can
-       define MALLOC_ALIGNMENT to be wider than this if necessary.
-
-  Minimum overhead per allocated chunk:   4 or 8 bytes
-       Each malloced chunk has a hidden word of overhead holding size
-       and status information.
-
-  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
-                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
-
-       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
-       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
-       needed; 4 (8) for a trailing size field and 8 (16) bytes for
-       free list pointers. Thus, the minimum allocatable size is
-       16/24/32 bytes.
-
-       Even a request for zero bytes (i.e., malloc(0)) returns a
-       pointer to something of the minimum allocatable size.
-
-       The maximum overhead wastage (i.e., number of extra bytes
-       allocated than were requested in malloc) is less than or equal
-       to the minimum size, except for requests >= mmap_threshold that
-       are serviced via mmap(), where the worst case wastage is 2 *
-       sizeof(size_t) bytes plus the remainder from a system page (the
-       minimal mmap unit); typically 4096 or 8192 bytes.
-
-  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
-                           8-byte size_t: 2^64 minus about two pages
-
-       It is assumed that (possibly signed) size_t values suffice to
-       represent chunk sizes. `Possibly signed' is due to the fact
-       that `size_t' may be defined on a system as either a signed or
-       an unsigned type. The ISO C standard says that it must be
-       unsigned, but a few systems are known not to adhere to this.
-       Additionally, even when size_t is unsigned, sbrk (which is by
-       default used to obtain memory from system) accepts signed
-       arguments, and may not be able to handle size_t-wide arguments
-       with negative sign bit.  Generally, values that would
-       appear as negative after accounting for overhead and alignment
-       are supported only via mmap(), which does not have this
-       limitation.
-
-       Requests for sizes outside the allowed range will perform an optional
-       failure action and then return null. (Requests may also
-       also fail because a system is out of memory.)
-
-  Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined
-
-       When USE_MALLOC_LOCK is defined, wrappers are created to
-       surround every public call with either a pthread mutex or
-       a win32 spinlock (depending on WIN32). This is not
-       especially fast, and can be a major bottleneck.
-       It is designed only to provide minimal protection
-       in concurrent environments, and to provide a basis for
-       extensions.  If you are using malloc in a concurrent program,
-       you would be far better off obtaining ptmalloc, which is
-       derived from a version of this malloc, and is well-tuned for
-       concurrent programs. (See http://www.malloc.de) Note that
-       even when USE_MALLOC_LOCK is defined, you can can guarantee
-       full thread-safety only if no threads acquire memory through
-       direct calls to MORECORE or other system-level allocators.
-
-  Compliance: I believe it is compliant with the 1997 Single Unix Specification
-       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably
-       others as well.
-
-* Synopsis of compile-time options:
-
-    People have reported using previous versions of this malloc on all
-    versions of Unix, sometimes by tweaking some of the defines
-    below. It has been tested most extensively on Solaris and
-    Linux. It is also reported to work on WIN32 platforms.
-    People also report using it in stand-alone embedded systems.
-
-    The implementation is in straight, hand-tuned ANSI C.  It is not
-    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
-    usable, this code should be compiled using an optimizing compiler
-    (for example gcc -O3) that can simplify expressions and control
-    paths. (FAQ: some macros import variables as arguments rather than
-    declare locals because people reported that some debuggers
-    otherwise get confused.)
-
-    OPTION                     DEFAULT VALUE
-
-    Compilation Environment options:
-
-    __STD_C                    derived from C compiler defines
-    WIN32                      NOT defined
-    HAVE_MEMCPY                defined
-    USE_MEMCPY                 1 if HAVE_MEMCPY is defined
-    HAVE_MMAP                  defined as 1
-    MMAP_CLEARS                1
-    HAVE_MREMAP                0 unless linux defined
-    malloc_getpagesize         derived from system #includes, or 4096 if not
-    HAVE_USR_INCLUDE_MALLOC_H  NOT defined
-    LACKS_UNISTD_H             NOT defined unless WIN32
-    LACKS_SYS_PARAM_H          NOT defined unless WIN32
-    LACKS_SYS_MMAN_H           NOT defined unless WIN32
-    LACKS_FCNTL_H              NOT defined
-
-    Changing default word sizes:
-
-    INTERNAL_SIZE_T            size_t
-    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T)
-    PTR_UINT                   unsigned long
-    CHUNK_SIZE_T               unsigned long
-
-    Configuration and functionality options:
-
-    USE_DL_PREFIX              NOT defined
-    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
-    USE_MALLOC_LOCK            NOT defined
-    DL_DEBUG                   NOT defined
-    REALLOC_ZERO_BYTES_FREES   NOT defined
-    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op
-    TRIM_FASTBINS              0
-    FIRST_SORTED_BIN_SIZE      512
-
-    Options for customizing MORECORE:
-
-    MORECORE                   sbrk
-    MORECORE_CONTIGUOUS        1
-    MORECORE_CANNOT_TRIM       NOT defined
-    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
-
-    Tuning options that are also dynamically changeable via mallopt:
-
-    DEFAULT_MXFAST             64
-    DEFAULT_TRIM_THRESHOLD     256 * 1024
-    DEFAULT_TOP_PAD            0
-    DEFAULT_MMAP_THRESHOLD     256 * 1024
-    DEFAULT_MMAP_MAX           65536
-
-    There are several other #defined constants and macros that you
-    probably don't want to touch unless you are extending or adapting malloc.
-*/
-
-/*
-  WIN32 sets up defaults for MS environment and compilers.
-  Otherwise defaults are for unix.
-*/
-
-/* #define WIN32 */
-
-#ifdef WIN32
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/* Win32 doesn't supply or need the following headers */
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-
-/* Use the supplied emulation of sbrk */
-#define MORECORE sbrk
-#define MORECORE_CONTIGUOUS 1
-#define MORECORE_FAILURE ((void *)(-1))
-
-/* Use the supplied emulation of mmap and munmap */
-#define HAVE_MMAP 1
-#define MUNMAP_FAILURE (-1)
-#define MMAP_CLEARS 1
-
-/* These values don't really matter in windows mmap emulation */
-#define MAP_PRIVATE 1
-#define MAP_ANONYMOUS 2
-#define PROT_READ 1
-#define PROT_WRITE 2
-
-/* Emulation functions defined at the end of this file */
-
-/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */
-#ifdef USE_MALLOC_LOCK
-static int slwait(int *sl);
-static int slrelease(int *sl);
-#endif
-
-static long getpagesize(void);
-static long getregionsize(void);
-static void *sbrk(long size);
-static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg);
-static long munmap(void *ptr, long size);
-
-static void vminfo(unsigned long *free, unsigned long *reserved, unsigned long *committed);
-static int cpuinfo(int whole, unsigned long *kernel, unsigned long *user);
-
-#endif
-
-/*
-  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
-  compiler, or a C compiler sufficiently close to ANSI to get away
-  with it.
-*/
-
-#ifndef __STD_C
-#if defined(__STDC__) || defined(_cplusplus)
-#define __STD_C 1
-#else
-#define __STD_C 0
-#endif
-#endif /*__STD_C*/
-
-/*
-  Void_t* is the pointer type that malloc should say it returns
-*/
-
-#ifndef Void_t
-#if (__STD_C || defined(WIN32))
-#define Void_t void
-#else
-#define Void_t char
-#endif
-#endif /*Void_t*/
-
-#if __STD_C
-#include <stddef.h> /* for size_t */
-#else
-#include <sys/types.h>
-#endif
-
-/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
-
-/* #define  LACKS_UNISTD_H */
-
-#ifndef LACKS_UNISTD_H
-#include <unistd.h>
-#endif
-
-/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
-
-/* #define  LACKS_SYS_PARAM_H */
-
-#include <errno.h> /* needed for optional MALLOC_FAILURE_ACTION */
-#include <stdio.h> /* needed for malloc_stats */
-
-/*
-  Debugging:
-
-  Because freed chunks may be overwritten with bookkeeping fields, this
-  malloc will often die when freed memory is overwritten by user
-  programs.  This can be very effective (albeit in an annoying way)
-  in helping track down dangling pointers.
-
-  If you compile with -DDL_DEBUG, a number of assertion checks are
-  enabled that will catch more memory errors. You probably won't be
-  able to make much sense of the actual assertion errors, but they
-  should help you locate incorrectly overwritten memory.  The
-  checking is fairly extensive, and will slow down execution
-  noticeably. Calling malloc_stats or mallinfo with DL_DEBUG set will
-  attempt to check every non-mmapped allocated and free chunk in the
-  course of computing the summmaries. (By nature, mmapped regions
-  cannot be checked very much automatically.)
-
-  Setting DL_DEBUG may also be helpful if you are trying to modify
-  this code. The assertions in the check routines spell out in more
-  detail the assumptions and invariants underlying the algorithms.
-
-  Setting DL_DEBUG does NOT provide an automated mechanism for checking
-  that all accesses to malloced memory stay within their
-  bounds. However, there are several add-ons and adaptations of this
-  or other mallocs available that do this.
-*/
-
-#include <assert.h>
-
-/*
-  The unsigned integer type used for comparing any two chunk sizes.
-  This should be at least as wide as size_t, but should not be signed.
-*/
-
-#ifndef CHUNK_SIZE_T
-#define CHUNK_SIZE_T unsigned long
-#endif
-
-/*
-  The unsigned integer type used to hold addresses when they are are
-  manipulated as integers. Except that it is not defined on all
-  systems, intptr_t would suffice.
-*/
-#ifndef PTR_UINT
-#define PTR_UINT unsigned long
-#endif
-
-/*
-  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
-  of chunk sizes.
-
-  The default version is the same as size_t.
-
-  While not strictly necessary, it is best to define this as an
-  unsigned type, even if size_t is a signed type. This may avoid some
-  artificial size limitations on some systems.
-
-  On a 64-bit machine, you may be able to reduce malloc overhead by
-  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
-  expense of not being able to handle more than 2^32 of malloced
-  space. If this limitation is acceptable, you are encouraged to set
-  this unless you are on a platform requiring 16byte alignments. In
-  this case the alignment requirements turn out to negate any
-  potential advantages of decreasing size_t word size.
-
-  Implementors: Beware of the possible combinations of:
-     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
-       and might be the same width as int or as long
-     - size_t might have different width and signedness as INTERNAL_SIZE_T
-     - int and long might be 32 or 64 bits, and might be the same width
-  To deal with this, most comparisons and difference computations
-  among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being
-  aware of the fact that casting an unsigned int to a wider long does
-  not sign-extend. (This also makes checking for negative numbers
-  awkward.) Some of these casts result in harmless compiler warnings
-  on some systems.
-*/
-
-#ifndef INTERNAL_SIZE_T
-#define INTERNAL_SIZE_T size_t
-#endif
-
-/* The corresponding word size */
-#define SIZE_SZ (sizeof(INTERNAL_SIZE_T))
-
-/*
-  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
-  It must be a power of two at least 2 * SIZE_SZ, even on machines
-  for which smaller alignments would suffice. It may be defined as
-  larger than this though. Note however that code and data structures
-  are optimized for the case of 8-byte alignment.
-*/
-
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT (2 * SIZE_SZ)
-#endif
-
-/* The corresponding bit mask value */
-#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
-
-/*
-  REALLOC_ZERO_BYTES_FREES should be set if a call to
-  realloc with zero bytes should be the same as a call to free.
-  Some people think it should. Otherwise, since this malloc
-  returns a unique pointer for malloc(0), so does realloc(p, 0).
-*/
-
-/*   #define REALLOC_ZERO_BYTES_FREES */
-
-/*
-  TRIM_FASTBINS controls whether free() of a very small chunk can
-  immediately lead to trimming. Setting to true (1) can reduce memory
-  footprint, but will almost always slow down programs that use a lot
-  of small chunks.
-
-  Define this only if you are willing to give up some speed to more
-  aggressively reduce system-level memory footprint when releasing
-  memory in programs that use many small chunks.  You can get
-  essentially the same effect by setting MXFAST to 0, but this can
-  lead to even greater slowdowns in programs using many small chunks.
-  TRIM_FASTBINS is an in-between compile-time option, that disables
-  only those chunks bordering topmost memory from being placed in
-  fastbins.
-*/
-
-#ifndef TRIM_FASTBINS
-#define TRIM_FASTBINS 0
-#endif
-
-/*
-  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
-  This is necessary when you only want to use this malloc in one part
-  of a program, using your regular system malloc elsewhere.
-*/
-
-/* #define USE_DL_PREFIX */
-
-/*
-  USE_MALLOC_LOCK causes wrapper functions to surround each
-  callable routine with pthread mutex lock/unlock.
-
-  USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined
-*/
-
-/* #define USE_MALLOC_LOCK */
-
-/*
-  If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is
-  actually a wrapper function that first calls MALLOC_PREACTION, then
-  calls the internal routine, and follows it with
-  MALLOC_POSTACTION. This is needed for locking, but you can also use
-  this, without USE_MALLOC_LOCK, for purposes of interception,
-  instrumentation, etc. It is a sad fact that using wrappers often
-  noticeably degrades performance of malloc-intensive programs.
-*/
-
-#ifdef USE_MALLOC_LOCK
-#define USE_PUBLIC_MALLOC_WRAPPERS
-#else
-/* #define USE_PUBLIC_MALLOC_WRAPPERS */
-#endif
-
-/*
-   Two-phase name translation.
-   All of the actual routines are given mangled names.
-   When wrappers are used, they become the public callable versions.
-   When DL_PREFIX is used, the callable names are prefixed.
-*/
-
-#ifndef USE_PUBLIC_MALLOC_WRAPPERS
-#define cALLOc public_cALLOc
-#define fREe public_fREe
-#define cFREe public_cFREe
-#define mALLOc public_mALLOc
-#define mEMALIGn public_mEMALIGn
-#define rEALLOc public_rEALLOc
-#define vALLOc public_vALLOc
-#define pVALLOc public_pVALLOc
-#define mALLINFo public_mALLINFo
-#define mALLOPt public_mALLOPt
-#define mTRIm public_mTRIm
-#define mSTATs public_mSTATs
-#define mUSABLe public_mUSABLe
-#define iCALLOc public_iCALLOc
-#define iCOMALLOc public_iCOMALLOc
-#endif
-
-#ifdef USE_DL_PREFIX
-#define public_cALLOc dlcalloc
-#define public_fREe dlfree
-#define public_cFREe dlcfree
-#define public_mALLOc dlmalloc
-#define public_mEMALIGn dlmemalign
-#define public_rEALLOc dlrealloc
-#define public_vALLOc dlvalloc
-#define public_pVALLOc dlpvalloc
-#define public_mALLINFo dlmallinfo
-#define public_mALLOPt dlmallopt
-#define public_mTRIm dlmalloc_trim
-#define public_mSTATs dlmalloc_stats
-#define public_mUSABLe dlmalloc_usable_size
-#define public_iCALLOc dlindependent_calloc
-#define public_iCOMALLOc dlindependent_comalloc
-#else /* USE_DL_PREFIX */
-#define public_cALLOc calloc
-#define public_fREe free
-#define public_cFREe cfree
-#define public_mALLOc malloc
-#define public_mEMALIGn memalign
-#define public_rEALLOc realloc
-#define public_vALLOc valloc
-#define public_pVALLOc pvalloc
-#define public_mALLINFo mallinfo
-#define public_mALLOPt mallopt
-#define public_mTRIm malloc_trim
-#define public_mSTATs malloc_stats
-#define public_mUSABLe malloc_usable_size
-#define public_iCALLOc independent_calloc
-#define public_iCOMALLOc independent_comalloc
-#endif /* USE_DL_PREFIX */
-
-/*
-  HAVE_MEMCPY should be defined if you are not otherwise using
-  ANSI STD C, but still have memcpy and memset in your C library
-  and want to use them in calloc and realloc. Otherwise simple
-  macro versions are defined below.
-
-  USE_MEMCPY should be defined as 1 if you actually want to
-  have memset and memcpy called. People report that the macro
-  versions are faster than libc versions on some systems.
-
-  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
-  (of <= 36 bytes) are manually unrolled in realloc and calloc.
-*/
-
-#define HAVE_MEMCPY
-
-#ifndef USE_MEMCPY
-#ifdef HAVE_MEMCPY
-#define USE_MEMCPY 1
-#else
-#define USE_MEMCPY 0
-#endif
-#endif
-
-#if (__STD_C || defined(HAVE_MEMCPY))
-
-#ifdef WIN32
-/* On Win32 memset and memcpy are already declared in windows.h */
-#else
-#if __STD_C
-void *memset(void *, int, size_t);
-void *memcpy(void *, const void *, size_t);
-#else
-Void_t *memset();
-Void_t *memcpy();
-#endif
-#endif
-#endif
-
-/*
-  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
-  malloc fails to be able to return memory, either because memory is
-  exhausted or because of illegal arguments.
-
-  By default, sets errno if running on STD_C platform, else does nothing.
-*/
-
-#ifndef MALLOC_FAILURE_ACTION
-#if __STD_C
-#define MALLOC_FAILURE_ACTION \
-    errno = ENOMEM;
-
-#else
-#define MALLOC_FAILURE_ACTION
-#endif
-#endif
-
-/*
-  MORECORE-related declarations. By default, rely on sbrk
-*/
-
-#ifdef LACKS_UNISTD_H
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-#if __STD_C
-extern Void_t *sbrk(ptrdiff_t);
-#else
-extern Void_t *sbrk();
-#endif
-#endif
-#endif
-
-/*
-  MORECORE is the name of the routine to call to obtain more memory
-  from the system.  See below for general guidance on writing
-  alternative MORECORE functions, as well as a version for WIN32 and a
-  sample version for pre-OSX macos.
-*/
-
-// #define _GNU_SOURCE
-// #include <unistd.h>
-extern void *sbrk(intptr_t __delta) __THROW;
-#define MORECORE sbrk
-
-/*
-  MORECORE_FAILURE is the value returned upon failure of MORECORE
-  as well as mmap. Since it cannot be an otherwise valid memory address,
-  and must reflect values of standard sys calls, you probably ought not
-  try to redefine it.
-*/
-
-#ifndef MORECORE_FAILURE
-#define MORECORE_FAILURE (-1)
-#endif
-
-/*
-  If MORECORE_CONTIGUOUS is true, take advantage of fact that
-  consecutive calls to MORECORE with positive arguments always return
-  contiguous increasing addresses.  This is true of unix sbrk.  Even
-  if not defined, when regions happen to be contiguous, malloc will
-  permit allocations spanning regions obtained from different
-  calls. But defining this when applicable enables some stronger
-  consistency checks and space efficiencies.
-*/
-
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif
-
-/*
-  Define MORECORE_CANNOT_TRIM if your version of MORECORE
-  cannot release space back to the system when given negative
-  arguments. This is generally necessary only if you are using
-  a hand-crafted MORECORE function that cannot handle negative arguments.
-*/
-
-/* #define MORECORE_CANNOT_TRIM */
-
-/*
-  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
-  allocate very large blocks.  These will be returned to the
-  operating system immediately after a free(). Also, if mmap
-  is available, it is used as a backup strategy in cases where
-  MORECORE fails to provide space from system.
-
-  This malloc is best tuned to work with mmap for large requests.
-  If you do not have mmap, operations involving very large chunks (1MB
-  or so) may be slower than you'd like.
-*/
-
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif
-
-#if HAVE_MMAP
-/*
-   Standard unix mmap using /dev/zero clears memory so calloc doesn't
-   need to.
-*/
-
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif
-
-#else /* no mmap */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 0
-#endif
-#endif
-
-/*
-   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
-   sbrk fails, and mmap is used as a backup (which is done only if
-   HAVE_MMAP).  The value must be a multiple of page size.  This
-   backup strategy generally applies only when systems have "holes" in
-   address space, so sbrk cannot perform contiguous expansion, but
-   there is still space available on system.  On systems for which
-   this is known to be useful (i.e. most linux kernels), this occurs
-   only when programs allocate huge amounts of memory.  Between this,
-   and the fact that mmap regions tend to be limited, the size should
-   be large, to avoid too many mmap calls and thus avoid running out
-   of kernel resources.
-*/
-
-#ifndef MMAP_AS_MORECORE_SIZE
-#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
-#endif
-
-/*
-  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
-  large blocks.  This is currently only possible on Linux with
-  kernel versions newer than 1.3.77.
-*/
-
-#ifndef HAVE_MREMAP
-#if defined(linux) && defined(__USE_GNU)
-#define HAVE_MREMAP 1
-#else
-#define HAVE_MREMAP 0
-#endif
-
-#endif /* HAVE_MMAP */
-
-/*
-  The system page size. To the extent possible, this malloc manages
-  memory from the system in page-size units.  Note that this value is
-  cached during initialization into a field of malloc_state. So even
-  if malloc_getpagesize is a function, it is only called once.
-
-  The following mechanics for getpagesize were adapted from bsd/gnu
-  getpagesize.h. If none of the system-probes here apply, a value of
-  4096 is used, which should be OK: If they don't apply, then using
-  the actual value probably doesn't impact performance.
-*/
-
-#ifndef malloc_getpagesize
-
-#ifndef LACKS_UNISTD_H
-#include <unistd.h>
-#endif
-
-#ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
-#ifndef _SC_PAGE_SIZE
-#define _SC_PAGE_SIZE _SC_PAGESIZE
-#endif
-#endif
-
-#ifdef _SC_PAGE_SIZE
-#define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-#else
-#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
-extern size_t getpagesize();
-#define malloc_getpagesize getpagesize()
-#else
-#ifdef WIN32 /* use supplied emulation of getpagesize */
-#define malloc_getpagesize getpagesize()
-#else
-#ifndef LACKS_SYS_PARAM_H
-#include <sys/param.h>
-#endif
-#ifdef EXEC_PAGESIZE
-#define malloc_getpagesize EXEC_PAGESIZE
-#else
-#ifdef NBPG
-#ifndef CLSIZE
-#define malloc_getpagesize NBPG
-#else
-#define malloc_getpagesize (NBPG * CLSIZE)
-#endif
-#else
-#ifdef NBPC
-#define malloc_getpagesize NBPC
-#else
-#ifdef PAGESIZE
-#define malloc_getpagesize PAGESIZE
-#else /* just guess */
-#define malloc_getpagesize (4096)
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-
-/*
-  This version of malloc supports the standard SVID/XPG mallinfo
-  routine that returns a struct containing usage properties and
-  statistics. It should work on any SVID/XPG compliant system that has
-  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
-  install such a thing yourself, cut out the preliminary declarations
-  as described above and below and save them in a malloc.h file. But
-  there's no compelling reason to bother to do this.)
-
-  The main declaration needed is the mallinfo struct that is returned
-  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
-  bunch of fields that are not even meaningful in this version of
-  malloc.  These fields are are instead filled by mallinfo() with
-  other numbers that might be of interest.
-
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
-  /usr/include/malloc.h file that includes a declaration of struct
-  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
-  version is declared below.  These must be precisely the same for
-  mallinfo() to work.  The original SVID version of this struct,
-  defined on most systems with mallinfo, declares all fields as
-  ints. But some others define as unsigned long. If your system
-  defines the fields using a type of different width than listed here,
-  you must #include your system version and #define
-  HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else
-
-/* SVID2/XPG mallinfo structure */
-
-/*
-  SVID/XPG defines four standard parameter numbers for mallopt,
-  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
-  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
-  so setting them has no effect. But this malloc also supports other
-  options in mallopt described below.
-*/
-#endif
-
-/* ---------- description of public routines ------------ */
-
-/*
-  malloc(size_t n)
-  Returns a pointer to a newly allocated chunk of at least n bytes, or null
-  if no space is available. Additionally, on failure, errno is
-  set to ENOMEM on ANSI C systems.
-
-  If n is zero, malloc returns a minumum-sized chunk. (The minimum
-  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
-  systems.)  On most systems, size_t is an unsigned type, so calls
-  with negative arguments are interpreted as requests for huge amounts
-  of space, which will often fail. The maximum supported value of n
-  differs across systems, but is in all cases less than the maximum
-  representable value of a size_t.
-*/
-#if __STD_C
-Void_t *public_mALLOc(size_t);
-#else
-Void_t *public_mALLOc();
-#endif
-
-/*
-  free(Void_t* p)
-  Releases the chunk of memory pointed to by p, that had been previously
-  allocated using malloc or a related routine such as realloc.
-  It has no effect if p is null. It can have arbitrary (i.e., bad!)
-  effects if p has already been freed.
-
-  Unless disabled (using mallopt), freeing very large spaces will
-  when possible, automatically trigger operations that give
-  back unused memory to the system, thus reducing program footprint.
-*/
-#if __STD_C
-void public_fREe(Void_t *);
-#else
-void public_fREe();
-#endif
-
-/*
-  calloc(size_t n_elements, size_t element_size);
-  Returns a pointer to n_elements * element_size bytes, with all locations
-  set to zero.
-*/
-#if __STD_C
-Void_t *public_cALLOc(size_t, size_t);
-#else
-Void_t *public_cALLOc();
-#endif
-
-/*
-  realloc(Void_t* p, size_t n)
-  Returns a pointer to a chunk of size n that contains the same data
-  as does chunk p up to the minimum of (n, p's size) bytes, or null
-  if no space is available.
-
-  The returned pointer may or may not be the same as p. The algorithm
-  prefers extending p when possible, otherwise it employs the
-  equivalent of a malloc-copy-free sequence.
-
-  If p is null, realloc is equivalent to malloc.
-
-  If space is not available, realloc returns null, errno is set (if on
-  ANSI) and p is NOT freed.
-
-  if n is for fewer bytes than already held by p, the newly unused
-  space is lopped off and freed if possible.  Unless the #define
-  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
-  zero (re)allocates a minimum-sized chunk.
-
-  Large chunks that were internally obtained via mmap will always
-  be reallocated using malloc-copy-free sequences unless
-  the system supports MREMAP (currently only linux).
-
-  The old unix realloc convention of allowing the last-free'd chunk
-  to be used as an argument to realloc is not supported.
-*/
-#if __STD_C
-Void_t *public_rEALLOc(Void_t *, size_t);
-#else
-Void_t *public_rEALLOc();
-#endif
-
-/*
-  memalign(size_t alignment, size_t n);
-  Returns a pointer to a newly allocated chunk of n bytes, aligned
-  in accord with the alignment argument.
-
-  The alignment argument should be a power of two. If the argument is
-  not a power of two, the nearest greater power is used.
-  8-byte alignment is guaranteed by normal malloc calls, so don't
-  bother calling memalign with an argument of 8 or less.
-
-  Overreliance on memalign is a sure way to fragment space.
-*/
-#if __STD_C
-Void_t *public_mEMALIGn(size_t, size_t);
-#else
-Void_t *public_mEMALIGn();
-#endif
-
-/*
-  valloc(size_t n);
-  Equivalent to memalign(pagesize, n), where pagesize is the page
-  size of the system. If the pagesize is unknown, 4096 is used.
-*/
-#if __STD_C
-Void_t *public_vALLOc(size_t);
-#else
-Void_t *public_vALLOc();
-#endif
-
-/*
-  mallopt(int parameter_number, int parameter_value)
-  Sets tunable parameters The format is to provide a
-  (parameter-number, parameter-value) pair.  mallopt then sets the
-  corresponding parameter to the argument value if it can (i.e., so
-  long as the value is meaningful), and returns 1 if successful else
-  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
-  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
-  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
-  so setting them has no effect. But this malloc also supports four
-  other options in mallopt. See below for details.  Briefly, supported
-  parameters are as follows (listed defaults are for "typical"
-  configurations).
-
-  Symbol            param #   default    allowed param values
-  M_MXFAST          1         64         0-80  (0 disables fastbins)
-  M_TRIM_THRESHOLD -1         256*1024   any   (-1U disables trimming)
-  M_TOP_PAD        -2         0          any
-  M_MMAP_THRESHOLD -3         256*1024   any   (or 0 if no MMAP support)
-  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
-*/
-#if __STD_C
-int public_mALLOPt(int, int);
-#else
-int public_mALLOPt();
-#endif
-
-/*
-  mallinfo()
-  Returns (by copy) a struct containing various summary statistics:
-
-  arena:     current total non-mmapped bytes allocated from system
-  ordblks:   the number of free chunks
-  smblks:    the number of fastbin blocks (i.e., small chunks that
-               have been freed but not use resused or consolidated)
-  hblks:     current number of mmapped regions
-  hblkhd:    total bytes held in mmapped regions
-  usmblks:   the maximum total allocated space. This will be greater
-                than current total if trimming has occurred.
-  fsmblks:   total bytes held in fastbin blocks
-  uordblks:  current total allocated space (normal or mmapped)
-  fordblks:  total free space
-  keepcost:  the maximum number of bytes that could ideally be released
-               back to system via malloc_trim. ("ideally" means that
-               it ignores page restrictions etc.)
-
-  Because these fields are ints, but internal bookkeeping may
-  be kept as longs, the reported values may wrap around zero and
-  thus be inaccurate.
-*/
-#if __STD_C
-struct mallinfo public_mALLINFo(void);
-#else
-struct mallinfo public_mALLINFo();
-#endif
-
-/*
-  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
-
-  independent_calloc is similar to calloc, but instead of returning a
-  single cleared space, it returns an array of pointers to n_elements
-  independent elements that can hold contents of size elem_size, each
-  of which starts out cleared, and can be independently freed,
-  realloc'ed etc. The elements are guaranteed to be adjacently
-  allocated (this is not guaranteed to occur with multiple callocs or
-  mallocs), which may also improve cache locality in some
-  applications.
-
-  The "chunks" argument is optional (i.e., may be null, which is
-  probably the most typical usage). If it is null, the returned array
-  is itself dynamically allocated and should also be freed when it is
-  no longer needed. Otherwise, the chunks array must be of at least
-  n_elements in length. It is filled in with the pointers to the
-  chunks.
-
-  In either case, independent_calloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and "chunks"
-  is null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use regular calloc and assign pointers into this
-  space to represent elements.  (In this case though, you cannot
-  independently free elements.)
-
-  independent_calloc simplifies and speeds up implementations of many
-  kinds of pools.  It may also be useful when constructing large data
-  structures that initially have a fixed number of fixed-sized nodes,
-  but the number is not known at compile time, and some of the nodes
-  may later need to be freed. For example:
-
-  struct Node { int item; struct Node* next; };
-
-  struct Node* build_list() {
-    struct Node** pool;
-    int n = read_number_of_nodes_needed();
-    if (n <= 0) return 0;
-    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
-    if (pool == 0) die();
-    // organize into a linked list...
-    struct Node* first = pool[0];
-    for (i = 0; i < n-1; ++i)
-      pool[i]->next = pool[i+1];
-    free(pool);     // Can now free the array (or not, if it is needed later)
-    return first;
-  }
-*/
-#if __STD_C
-Void_t **public_iCALLOc(size_t, size_t, Void_t **);
-#else
-Void_t **public_iCALLOc();
-#endif
-
-/*
-  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
-
-  independent_comalloc allocates, all at once, a set of n_elements
-  chunks with sizes indicated in the "sizes" array.    It returns
-  an array of pointers to these elements, each of which can be
-  independently freed, realloc'ed etc. The elements are guaranteed to
-  be adjacently allocated (this is not guaranteed to occur with
-  multiple callocs or mallocs), which may also improve cache locality
-  in some applications.
-
-  The "chunks" argument is optional (i.e., may be null). If it is null
-  the returned array is itself dynamically allocated and should also
-  be freed when it is no longer needed. Otherwise, the chunks array
-  must be of at least n_elements in length. It is filled in with the
-  pointers to the chunks.
-
-  In either case, independent_comalloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and chunks is
-  null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use a single regular malloc, and assign pointers at
-  particular offsets in the aggregate space. (In this case though, you
-  cannot independently free elements.)
-
-  independent_comallac differs from independent_calloc in that each
-  element may have a different size, and also that it does not
-  automatically clear elements.
-
-  independent_comalloc can be used to speed up allocation in cases
-  where several structs or objects must always be allocated at the
-  same time.  For example:
-
-  struct Head { ... }
-  struct Foot { ... }
-
-  void send_message(char* msg) {
-    int msglen = strlen(msg);
-    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
-    void* chunks[3];
-    if (independent_comalloc(3, sizes, chunks) == 0)
-      die();
-    struct Head* head = (struct Head*)(chunks[0]);
-    char*        body = (char*)(chunks[1]);
-    struct Foot* foot = (struct Foot*)(chunks[2]);
-    // ...
-  }
-
-  In general though, independent_comalloc is worth using only for
-  larger values of n_elements. For small values, you probably won't
-  detect enough difference from series of malloc calls to bother.
-
-  Overuse of independent_comalloc can increase overall memory usage,
-  since it cannot reuse existing noncontiguous small chunks that
-  might be available for some of the elements.
-*/
-#if __STD_C
-Void_t **public_iCOMALLOc(size_t, size_t *, Void_t **);
-#else
-Void_t **public_iCOMALLOc();
-#endif
-
-/*
-  pvalloc(size_t n);
-  Equivalent to valloc(minimum-page-that-holds(n)), that is,
-  round up n to nearest pagesize.
- */
-#if __STD_C
-Void_t *public_pVALLOc(size_t);
-#else
-Void_t *public_pVALLOc();
-#endif
-
-/*
-  cfree(Void_t* p);
-  Equivalent to free(p).
-
-  cfree is needed/defined on some systems that pair it with calloc,
-  for odd historical reasons (such as: cfree is used in example
-  code in the first edition of K&R).
-*/
-#if __STD_C
-void public_cFREe(Void_t *);
-#else
-void public_cFREe();
-#endif
-
-/*
-  malloc_trim(size_t pad);
-
-  If possible, gives memory back to the system (via negative
-  arguments to sbrk) if there is unused memory at the `high' end of
-  the malloc pool. You can call this after freeing large blocks of
-  memory to potentially reduce the system-level memory requirements
-  of a program. However, it cannot guarantee to reduce memory. Under
-  some allocation patterns, some large free blocks of memory will be
-  locked between two used chunks, so they cannot be given back to
-  the system.
-
-  The `pad' argument to malloc_trim represents the amount of free
-  trailing space to leave untrimmed. If this argument is zero,
-  only the minimum amount of memory to maintain internal data
-  structures will be left (one page or less). Non-zero arguments
-  can be supplied to maintain enough trailing space to service
-  future expected allocations without having to re-obtain memory
-  from the system.
-
-  Malloc_trim returns 1 if it actually released any memory, else 0.
-  On systems that do not support "negative sbrks", it will always
-  rreturn 0.
-*/
-#if __STD_C
-int public_mTRIm(size_t);
-#else
-int public_mTRIm();
-#endif
-
-/*
-  malloc_usable_size(Void_t* p);
-
-  Returns the number of bytes you can actually use in
-  an allocated chunk, which may be more than you requested (although
-  often not) due to alignment and minimum size constraints.
-  You can use this many bytes without worrying about
-  overwriting other allocated objects. This is not a particularly great
-  programming practice. malloc_usable_size can be more useful in
-  debugging and assertions, for example:
-
-  p = malloc(n);
-  assert(malloc_usable_size(p) >= 256);
-
-*/
-#if __STD_C
-size_t public_mUSABLe(Void_t *);
-#else
-size_t public_mUSABLe();
-#endif
-
-/*
-  malloc_stats();
-  Prints on stderr the amount of space obtained from the system (both
-  via sbrk and mmap), the maximum amount (which may be more than
-  current if malloc_trim and/or munmap got called), and the current
-  number of bytes allocated via malloc (or realloc, etc) but not yet
-  freed. Note that this is the number of bytes allocated, not the
-  number requested. It will be larger than the number requested
-  because of alignment and bookkeeping overhead. Because it includes
-  alignment wastage as being in use, this figure may be greater than
-  zero even when no user-level chunks are allocated.
-
-  The reported current and maximum system memory can be inaccurate if
-  a program makes other calls to system memory allocation functions
-  (normally sbrk) outside of malloc.
-
-  malloc_stats prints only the most commonly interesting statistics.
-  More information can be obtained by calling mallinfo.
-
-*/
-#if __STD_C
-void public_mSTATs(void);
-#else
-void public_mSTATs(void);
-#endif
-
-/* mallopt tuning options */
-
-/*
-  M_MXFAST is the maximum request size used for "fastbins", special bins
-  that hold returned chunks without consolidating their spaces. This
-  enables future requests for chunks of the same size to be handled
-  very quickly, but can increase fragmentation, and thus increase the
-  overall memory footprint of a program.
-
-  This malloc manages fastbins very conservatively yet still
-  efficiently, so fragmentation is rarely a problem for values less
-  than or equal to the default.  The maximum supported value of MXFAST
-  is 80. You wouldn't want it any higher than this anyway.  Fastbins
-  are designed especially for use with many small structs, objects or
-  strings -- the default handles structs/objects/arrays with sizes up
-  to 16 4byte fields, or small strings representing words, tokens,
-  etc. Using fastbins for larger objects normally worsens
-  fragmentation without improving speed.
-
-  M_MXFAST is set in REQUEST size units. It is internally used in
-  chunksize units, which adds padding and alignment.  You can reduce
-  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
-  algorithm to be a closer approximation of fifo-best-fit in all cases,
-  not just for larger requests, but will generally cause it to be
-  slower.
-*/
-
-/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
-#ifndef M_MXFAST
-#define M_MXFAST 1
-#endif
-
-#ifndef DEFAULT_MXFAST
-#define DEFAULT_MXFAST 64
-#endif
-
-/*
-  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
-  to keep before releasing via malloc_trim in free().
-
-  Automatic trimming is mainly useful in long-lived programs.
-  Because trimming via sbrk can be slow on some systems, and can
-  sometimes be wasteful (in cases where programs immediately
-  afterward allocate more large chunks) the value should be high
-  enough so that your overall system performance would improve by
-  releasing this much memory.
-
-  The trim threshold and the mmap control parameters (see below)
-  can be traded off with one another. Trimming and mmapping are
-  two different ways of releasing unused memory back to the
-  system. Between these two, it is often possible to keep
-  system-level demands of a long-lived program down to a bare
-  minimum. For example, in one test suite of sessions measuring
-  the XF86 X server on Linux, using a trim threshold of 128K and a
-  mmap threshold of 192K led to near-minimal long term resource
-  consumption.
-
-  If you are using this malloc in a long-lived program, it should
-  pay to experiment with these values.  As a rough guide, you
-  might set to a value close to the average size of a process
-  (program) running on your system.  Releasing this much memory
-  would allow such a process to run in memory.  Generally, it's
-  worth it to tune for trimming rather tham memory mapping when a
-  program undergoes phases where several large chunks are
-  allocated and released in ways that can reuse each other's
-  storage, perhaps mixed with phases where there are no such
-  chunks at all.  And in well-behaved long-lived programs,
-  controlling release of large blocks via trimming versus mapping
-  is usually faster.
-
-  However, in most programs, these parameters serve mainly as
-  protection against the system-level effects of carrying around
-  massive amounts of unneeded memory. Since frequent calls to
-  sbrk, mmap, and munmap otherwise degrade performance, the default
-  parameters are set to relatively high values that serve only as
-  safeguards.
-
-  The trim value must be greater than page size to have any useful
-  effect.  To disable trimming completely, you can set to
-  (unsigned long)(-1)
-
-  Trim settings interact with fastbin (MXFAST) settings: Unless
-  TRIM_FASTBINS is defined, automatic trimming never takes place upon
-  freeing a chunk with size less than or equal to MXFAST. Trimming is
-  instead delayed until subsequent freeing of larger chunks. However,
-  you can still force an attempted trim by calling malloc_trim.
-
-  Also, trimming is not generally possible in cases where
-  the main arena is obtained via mmap.
-
-  Note that the trick some people use of mallocing a huge space and
-  then freeing it at program startup, in an attempt to reserve system
-  memory, doesn't have the intended effect under automatic trimming,
-  since that memory will immediately be returned to the system.
-*/
-
-#define M_TRIM_THRESHOLD -1
-
-#ifndef DEFAULT_TRIM_THRESHOLD
-#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
-#endif
-
-/*
-  M_TOP_PAD is the amount of extra `padding' space to allocate or
-  retain whenever sbrk is called. It is used in two ways internally:
-
-  * When sbrk is called to extend the top of the arena to satisfy
-  a new malloc request, this much padding is added to the sbrk
-  request.
-
-  * When malloc_trim is called automatically from free(),
-  it is used as the `pad' argument.
-
-  In both cases, the actual amount of padding is rounded
-  so that the end of the arena is always a system page boundary.
-
-  The main reason for using padding is to avoid calling sbrk so
-  often. Having even a small pad greatly reduces the likelihood
-  that nearly every malloc request during program start-up (or
-  after trimming) will invoke sbrk, which needlessly wastes
-  time.
-
-  Automatic rounding-up to page-size units is normally sufficient
-  to avoid measurable overhead, so the default is 0.  However, in
-  systems where sbrk is relatively slow, it can pay to increase
-  this value, at the expense of carrying around more memory than
-  the program needs.
-*/
-
-#define M_TOP_PAD -2
-
-#ifndef DEFAULT_TOP_PAD
-#define DEFAULT_TOP_PAD (0)
-#endif
-
-/*
-  M_MMAP_THRESHOLD is the request size threshold for using mmap()
-  to service a request. Requests of at least this size that cannot
-  be allocated using already-existing space will be serviced via mmap.
-  (If enough normal freed space already exists it is used instead.)
-
-  Using mmap segregates relatively large chunks of memory so that
-  they can be individually obtained and released from the host
-  system. A request serviced through mmap is never reused by any
-  other request (at least not directly; the system may just so
-  happen to remap successive requests to the same locations).
-
-  Segregating space in this way has the benefits that:
-
-   1. Mmapped space can ALWAYS be individually released back
-      to the system, which helps keep the system level memory
-      demands of a long-lived program low.
-   2. Mapped memory can never become `locked' between
-      other chunks, as can happen with normally allocated chunks, which
-      means that even trimming via malloc_trim would not release them.
-   3. On some systems with "holes" in address spaces, mmap can obtain
-      memory that sbrk cannot.
-
-  However, it has the disadvantages that:
-
-   1. The space cannot be reclaimed, consolidated, and then
-      used to service later requests, as happens with normal chunks.
-   2. It can lead to more wastage because of mmap page alignment
-      requirements
-   3. It causes malloc performance to be more dependent on host
-      system memory management support routines which may vary in
-      implementation quality and may impose arbitrary
-      limitations. Generally, servicing a request via normal
-      malloc steps is faster than going through a system's mmap.
-
-  The advantages of mmap nearly always outweigh disadvantages for
-  "large" chunks, but the value of "large" varies across systems.  The
-  default is an empirically derived value that works well in most
-  systems.
-*/
-
-#define M_MMAP_THRESHOLD -3
-
-#ifndef DEFAULT_MMAP_THRESHOLD
-#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
-#endif
-
-/*
-  M_MMAP_MAX is the maximum number of requests to simultaneously
-  service using mmap. This parameter exists because
-. Some systems have a limited number of internal tables for
-  use by mmap, and using more than a few of them may degrade
-  performance.
-
-  The default is set to a value that serves only as a safeguard.
-  Setting to 0 disables use of mmap for servicing large requests.  If
-  HAVE_MMAP is not set, the default value is 0, and attempts to set it
-  to non-zero values in mallopt will fail.
-*/
-
-#define M_MMAP_MAX -4
-
-#ifndef DEFAULT_MMAP_MAX
-#if HAVE_MMAP
-#define DEFAULT_MMAP_MAX (65536)
-#else
-#define DEFAULT_MMAP_MAX (0)
-#endif
-#endif
-
-/*
-  ========================================================================
-  To make a fully customizable malloc.h header file, cut everything
-  above this line, put into file malloc.h, edit to suit, and #include it
-  on the next line, as well as in programs that use this malloc.
-  ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/* --------------------- public wrappers ---------------------- */
-
-#ifdef USE_PUBLIC_MALLOC_WRAPPERS
-
-/* Declare all routines as internal */
-#if __STD_C
-static Void_t *mALLOc(size_t);
-static void fREe(Void_t *);
-static Void_t *rEALLOc(Void_t *, size_t);
-static Void_t *mEMALIGn(size_t, size_t);
-static Void_t *vALLOc(size_t);
-static Void_t *pVALLOc(size_t);
-static Void_t *cALLOc(size_t, size_t);
-static Void_t **iCALLOc(size_t, size_t, Void_t **);
-static Void_t **iCOMALLOc(size_t, size_t *, Void_t **);
-static void cFREe(Void_t *);
-static int mTRIm(size_t);
-static size_t mUSABLe(Void_t *);
-static void mSTATs();
-static int mALLOPt(int, int);
-static struct mallinfo mALLINFo(void);
-#else
-static Void_t *mALLOc();
-static void fREe();
-static Void_t *rEALLOc();
-static Void_t *mEMALIGn();
-static Void_t *vALLOc();
-static Void_t *pVALLOc();
-static Void_t *cALLOc();
-static Void_t **iCALLOc();
-static Void_t **iCOMALLOc();
-static void cFREe();
-static int mTRIm();
-static size_t mUSABLe();
-static void mSTATs();
-static int mALLOPt();
-static struct mallinfo mALLINFo();
-#endif
-
-/*
-  MALLOC_PREACTION and MALLOC_POSTACTION should be
-  defined to return 0 on success, and nonzero on failure.
-  The return value of MALLOC_POSTACTION is currently ignored
-  in wrapper functions since there is no reasonable default
-  action to take on failure.
-*/
-
-#ifdef USE_MALLOC_LOCK
-
-#ifdef WIN32
-
-static int mALLOC_MUTEx;
-#define MALLOC_PREACTION slwait(&mALLOC_MUTEx)
-#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx)
-
-#else
-
-#include <pthread.h>
-
-static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER;
-
-#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx)
-#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx)
-
-#endif /* USE_MALLOC_LOCK */
-
-#else
-
-/* Substitute anything you like for these */
-
-#define MALLOC_PREACTION (0)
-#define MALLOC_POSTACTION (0)
-
-#endif
-
-Void_t *public_mALLOc(size_t bytes) {
-    Void_t *m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = mALLOc(bytes);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-void public_fREe(Void_t *m) {
-    if (MALLOC_PREACTION != 0) {
-        return;
-    }
-    fREe(m);
-    if (MALLOC_POSTACTION != 0) {
-    }
-}
-
-Void_t *public_rEALLOc(Void_t *m, size_t bytes) {
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = rEALLOc(m, bytes);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t *public_mEMALIGn(size_t alignment, size_t bytes) {
-    Void_t *m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = mEMALIGn(alignment, bytes);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t *public_vALLOc(size_t bytes) {
-    Void_t *m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = vALLOc(bytes);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t *public_pVALLOc(size_t bytes) {
-    Void_t *m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = pVALLOc(bytes);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t *public_cALLOc(size_t n, size_t elem_size) {
-    Void_t *m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = cALLOc(n, elem_size);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t **public_iCALLOc(size_t n, size_t elem_size, Void_t **chunks) {
-    Void_t **m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = iCALLOc(n, elem_size, chunks);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-Void_t **public_iCOMALLOc(size_t n, size_t sizes[], Void_t **chunks) {
-    Void_t **m;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    m = iCOMALLOc(n, sizes, chunks);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-void public_cFREe(Void_t *m) {
-    if (MALLOC_PREACTION != 0) {
-        return;
-    }
-    cFREe(m);
-    if (MALLOC_POSTACTION != 0) {
-    }
-}
-
-int public_mTRIm(size_t s) {
-    int result;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    result = mTRIm(s);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return result;
-}
-
-size_t public_mUSABLe(Void_t *m) {
-    size_t result;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    result = mUSABLe(m);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return result;
-}
-
-void public_mSTATs() {
-    if (MALLOC_PREACTION != 0) {
-        return;
-    }
-    mSTATs();
-    if (MALLOC_POSTACTION != 0) {
-    }
-}
-
-struct mallinfo public_mALLINFo() {
-    struct mallinfo m;
-    if (MALLOC_PREACTION != 0) {
-        struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-        return nm;
-    }
-    m = mALLINFo();
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return m;
-}
-
-int public_mALLOPt(int p, int v) {
-    int result;
-    if (MALLOC_PREACTION != 0) {
-        return 0;
-    }
-    result = mALLOPt(p, v);
-    if (MALLOC_POSTACTION != 0) {
-    }
-    return result;
-}
-
-#endif
-
-/* ------------- Optional versions of memcopy ---------------- */
-
-#if USE_MEMCPY
-
-/*
-  Note: memcpy is ONLY invoked with non-overlapping regions,
-  so the (usually slower) memmove is not needed.
-*/
-
-#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes)
-#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes)
-
-#else /* !USE_MEMCPY */
-
-/* Use Duff's device for good zeroing/copying performance. */
-
-#define MALLOC_ZERO(charp, nbytes)                               \
-    do {                                                         \
-        INTERNAL_SIZE_T *mzp = (INTERNAL_SIZE_T *)(charp);       \
-        CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \
-        long mcn;                                                \
-        if (mctmp < 8)                                           \
-            mcn = 0;                                             \
-        else {                                                   \
-            mcn = (mctmp - 1) / 8;                               \
-            mctmp %= 8;                                          \
-        }                                                        \
-        switch (mctmp) {                                         \
-        case 0:                                                  \
-            for (;;) {                                           \
-                *mzp++ = 0;                                      \
-            case 7:                                              \
-                *mzp++ = 0;                                      \
-            case 6:                                              \
-                *mzp++ = 0;                                      \
-            case 5:                                              \
-                *mzp++ = 0;                                      \
-            case 4:                                              \
-                *mzp++ = 0;                                      \
-            case 3:                                              \
-                *mzp++ = 0;                                      \
-            case 2:                                              \
-                *mzp++ = 0;                                      \
-            case 1:                                              \
-                *mzp++ = 0;                                      \
-                if (mcn <= 0)                                    \
-                    break;                                       \
-                mcn--;                                           \
-            }                                                    \
-        }                                                        \
-    } while (0)
-
-#define MALLOC_COPY(dest, src, nbytes)                           \
-    do {                                                         \
-        INTERNAL_SIZE_T *mcsrc = (INTERNAL_SIZE_T *)src;         \
-        INTERNAL_SIZE_T *mcdst = (INTERNAL_SIZE_T *)dest;        \
-        CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \
-        long mcn;                                                \
-        if (mctmp < 8)                                           \
-            mcn = 0;                                             \
-        else {                                                   \
-            mcn = (mctmp - 1) / 8;                               \
-            mctmp %= 8;                                          \
-        }                                                        \
-        switch (mctmp) {                                         \
-        case 0:                                                  \
-            for (;;) {                                           \
-                *mcdst++ = *mcsrc++;                             \
-            case 7:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 6:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 5:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 4:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 3:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 2:                                              \
-                *mcdst++ = *mcsrc++;                             \
-            case 1:                                              \
-                *mcdst++ = *mcsrc++;                             \
-                if (mcn <= 0)                                    \
-                    break;                                       \
-                mcn--;                                           \
-            }                                                    \
-        }                                                        \
-    } while (0)
-
-#endif
-
-/* ------------------ MMAP support ------------------  */
-
-#if HAVE_MMAP
-
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif
-
-#ifndef LACKS_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-/*
-   Nearly all versions of mmap support MAP_ANONYMOUS,
-   so the following is unlikely to be needed, but is
-   supplied just in case.
-*/
-
-#ifndef MAP_ANONYMOUS
-
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-
-#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? (dev_zero_fd = open("/dev/zero", O_RDWR),               \
-                                                            mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) \
-                                                         : mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
-
-#else
-
-#define MMAP(addr, size, prot, flags) \
-    (mmap((addr), (size), (prot), (flags) | MAP_ANONYMOUS, -1, 0))
-
-#endif
-
-#endif /* HAVE_MMAP */
-
-/*
-  -----------------------  Chunk representations -----------------------
-*/
-
-/*
-  This struct declaration is misleading (but accurate and necessary).
-  It declares a "view" into memory allowing access to necessary
-  fields at known offsets from a given base. See explanation below.
-*/
-
-struct malloc_chunk {
-
-    INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free).  */
-    INTERNAL_SIZE_T size;      /* Size in bytes, including overhead. */
-
-    struct malloc_chunk *fd; /* double links -- used only if free. */
-    struct malloc_chunk *bk;
-};
-
-typedef struct malloc_chunk *mchunkptr;
-
-/*
-   malloc_chunk details:
-
-    (The following includes lightly edited explanations by Colin Plumb.)
-
-    Chunks of memory are maintained using a `boundary tag' method as
-    described in e.g., Knuth or Standish.  (See the paper by Paul
-    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
-    survey of such techniques.)  Sizes of free chunks are stored both
-    in the front of each chunk and at the end.  This makes
-    consolidating fragmented chunks into bigger chunks very fast.  The
-    size fields also hold bits representing whether chunks are free or
-    in use.
-
-    An allocated chunk looks like this:
-
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk, if allocated            | |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             User data starts here...                          .
-            .                                                               .
-            .             (malloc_usable_space() bytes)                     .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of chunk                                     |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-    Where "chunk" is the front of the chunk for the purpose of most of
-    the malloc code, but "mem" is the pointer that is returned to the
-    user.  "Nextchunk" is the beginning of the next contiguous chunk.
-
-    Chunks always begin on even word boundries, so the mem portion
-    (which is returned to the user) is also on an even word boundary, and
-    thus at least double-word aligned.
-
-    Free chunks are stored in circular doubly-linked lists, and look like this:
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk                            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `head:' |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Forward pointer to next chunk in list             |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Back pointer to previous chunk in list            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Unused space (may be 0 bytes long)                .
-            .                                                               .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `foot:' |             Size of chunk, in bytes                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
-    chunk size (which is always a multiple of two words), is an in-use
-    bit for the *previous* chunk.  If that bit is *clear*, then the
-    word before the current chunk size contains the previous chunk
-    size, and can be used to find the front of the previous chunk.
-    The very first chunk allocated always has this bit set,
-    preventing access to non-existent (or non-owned) memory. If
-    prev_inuse is set for any given chunk, then you CANNOT determine
-    the size of the previous chunk, and might even get a memory
-    addressing fault when trying to do so.
-
-    Note that the `foot' of the current chunk is actually represented
-    as the prev_size of the NEXT chunk. This makes it easier to
-    deal with alignments etc but can be very confusing when trying
-    to extend or adapt this code.
-
-    The two exceptions to all this are
-
-     1. The special chunk `top' doesn't bother using the
-        trailing size field since there is no next contiguous chunk
-        that would have to index off it. After initialization, `top'
-        is forced to always exist.  If it would become less than
-        MINSIZE bytes long, it is replenished.
-
-     2. Chunks allocated via mmap, which have the second-lowest-order
-        bit (IS_MMAPPED) set in their size fields.  Because they are
-        allocated one-by-one, each must contain its own trailing size field.
-
-*/
-
-/*
-  ---------- Size and alignment checks and conversions ----------
-*/
-
-/* conversion from malloc headers to user pointers, and back */
-
-#define chunk2mem(p) ((Void_t *)((char *)(p) + 2 * SIZE_SZ))
-#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - 2 * SIZE_SZ))
-
-/* The smallest possible chunk */
-#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk))
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-
-#define MINSIZE \
-    (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
-
-/* Check if m has acceptable alignment */
-
-#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0)
-
-/*
-   Check if a request is so large that it would wrap around zero when
-   padded and aligned. To simplify some other code, the bound is made
-   low enough so that adding MINSIZE will also not wrap around sero.
-*/
-
-#define REQUEST_OUT_OF_RANGE(req) \
-    ((CHUNK_SIZE_T)(req) >=       \
-     (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE))
-
-/* pad request bytes into a usable size -- internal version */
-
-#define request2size(req) \
-    (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
-
-/*  Same, except also perform argument check */
-
-#define checked_request2size(req, sz) \
-    if (REQUEST_OUT_OF_RANGE(req)) {  \
-        MALLOC_FAILURE_ACTION;        \
-        return 0;                     \
-    }                                 \
-    (sz) = request2size(req);
-
-/*
-  --------------- Physical chunk operations ---------------
-*/
-
-/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
-#define PREV_INUSE 0x1
-
-/* extract inuse bit of previous chunk */
-#define prev_inuse(p) ((p)->size & PREV_INUSE)
-
-/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
-#define IS_MMAPPED 0x2
-
-/* check for mmap()'ed chunk */
-#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
-
-/*
-  Bits to mask off when extracting size
-
-  Note: IS_MMAPPED is intentionally not masked off from size field in
-  macros for which mmapped chunks should never be seen. This should
-  cause helpful core dumps to occur if it is tried by accident by
-  people extending or adapting this malloc.
-*/
-#define SIZE_BITS (PREV_INUSE | IS_MMAPPED)
-
-/* Get size, ignoring use bits */
-#define chunksize(p) ((p)->size & ~(SIZE_BITS))
-
-/* Ptr to next physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))
-
-/* Ptr to previous physical malloc_chunk */
-#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_size)))
-
-/* Treat space at ptr + offset as a chunk */
-#define chunk_at_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
-
-/* extract p's inuse bit */
-#define inuse(p) \
-    ((((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
-
-/* set/clear chunk as being inuse without otherwise disturbing */
-#define set_inuse(p) \
-    ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
-
-#define clear_inuse(p) \
-    ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
-
-/* check/set/clear inuse bits in known places */
-#define inuse_bit_at_offset(p, s) \
-    (((mchunkptr)(((char *)(p)) + (s)))->size & PREV_INUSE)
-
-#define set_inuse_bit_at_offset(p, s) \
-    (((mchunkptr)(((char *)(p)) + (s)))->size |= PREV_INUSE)
-
-#define clear_inuse_bit_at_offset(p, s) \
-    (((mchunkptr)(((char *)(p)) + (s)))->size &= ~(PREV_INUSE))
-
-/* Set size at head, without disturbing its use bit */
-#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s)))
-
-/* Set size/use field */
-#define set_head(p, s) ((p)->size = (s))
-
-/* Set size at footer (only when chunk is not in use) */
-#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_size = (s))
-
-/*
-  -------------------- Internal data structures --------------------
-
-   All internal state is held in an instance of malloc_state defined
-   below. There are no other static variables, except in two optional
-   cases:
-   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
-   * If HAVE_MMAP is true, but mmap doesn't support
-     MAP_ANONYMOUS, a dummy file descriptor for mmap.
-
-   Beware of lots of tricks that minimize the total bookkeeping space
-   requirements. The result is a little over 1K bytes (for 4byte
-   pointers and size_t.)
-*/
-
-/*
-  Bins
-
-    An array of bin headers for free chunks. Each bin is doubly
-    linked.  The bins are approximately proportionally (log) spaced.
-    There are a lot of these bins (128). This may look excessive, but
-    works very well in practice.  Most bins hold sizes that are
-    unusual as malloc request sizes, but are more usual for fragments
-    and consolidated sets of chunks, which is what these bins hold, so
-    they can be found quickly.  All procedures maintain the invariant
-    that no consolidated chunk physically borders another one, so each
-    chunk in a list is known to be preceeded and followed by either
-    inuse chunks or the ends of memory.
-
-    Chunks in bins are kept in size order, with ties going to the
-    approximately least recently used chunk. Ordering isn't needed
-    for the small bins, which all contain the same-sized chunks, but
-    facilitates best-fit allocation for larger chunks. These lists
-    are just sequential. Keeping them in order almost never requires
-    enough traversal to warrant using fancier ordered data
-    structures.
-
-    Chunks of the same size are linked with the most
-    recently freed at the front, and allocations are taken from the
-    back.  This results in LRU (FIFO) allocation order, which tends
-    to give each chunk an equal opportunity to be consolidated with
-    adjacent freed chunks, resulting in larger free chunks and less
-    fragmentation.
-
-    To simplify use in double-linked lists, each bin header acts
-    as a malloc_chunk. This avoids special-casing for headers.
-    But to conserve space and improve locality, we allocate
-    only the fd/bk pointers of bins, and then use repositioning tricks
-    to treat these as the fields of a malloc_chunk*.
-*/
-
-typedef struct malloc_chunk *mbinptr;
-
-/* addressing -- note that bin_at(0) does not exist */
-#define bin_at(m, i) ((mbinptr)((char *)&((m)->bins[(i) << 1]) - (SIZE_SZ << 1)))
-
-/* analog of ++bin */
-#define next_bin(b) ((mbinptr)((char *)(b) + (sizeof(mchunkptr) << 1)))
-
-/* Reminders about list directionality within bins */
-#define first(b) ((b)->fd)
-#define last(b) ((b)->bk)
-
-/* Take a chunk off a bin list */
-#define unlink(P, BK, FD) \
-    {                     \
-        FD = P->fd;       \
-        BK = P->bk;       \
-        FD->bk = BK;      \
-        BK->fd = FD;      \
-    }
-
-/*
-  Indexing
-
-    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
-    8 bytes apart. Larger bins are approximately logarithmically spaced:
-
-    64 bins of size       8
-    32 bins of size      64
-    16 bins of size     512
-     8 bins of size    4096
-     4 bins of size   32768
-     2 bins of size  262144
-     1 bin  of size what's left
-
-    The bins top out around 1MB because we expect to service large
-    requests via mmap.
-*/
-
-#define NBINS 96
-#define NSMALLBINS 32
-#define SMALLBIN_WIDTH 8
-#define MIN_LARGE_SIZE 256
-
-#define in_smallbin_range(sz) \
-    ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE)
-
-#define smallbin_index(sz) (((unsigned)(sz)) >> 3)
-
-/*
-  Compute index for size. We expect this to be inlined when
-  compiled with optimization, else not, which works out well.
-*/
-static int largebin_index(unsigned int sz) {
-    unsigned int x = sz >> SMALLBIN_WIDTH;
-    unsigned int m; /* bit position of highest set bit of m */
-
-    if (x >= 0x10000) {
-        return NBINS - 1;
-    }
-
-    /* On intel, use BSRL instruction to find highest bit */
-#if defined(__GNUC__) && defined(i386)
-
-    __asm__("bsrl %1,%0\n\t"
-            : "=r"(m)
-            : "g"(x));
-
-#else
-    {
-        /*
-          Based on branch-free nlz algorithm in chapter 5 of Henry
-          S. Warren Jr's book "Hacker's Delight".
-        */
-
-        unsigned int n = ((x - 0x100) >> 16) & 8;
-        x <<= n;
-        m = ((x - 0x1000) >> 16) & 4;
-        n += m;
-        x <<= m;
-        m = ((x - 0x4000) >> 16) & 2;
-        n += m;
-        x = (x << m) >> 14;
-        m = 13 - n + (x & ~(x >> 1));
-    }
-#endif
-
-    /* Use next 2 bits to create finer-granularity bins */
-    return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3);
-}
-
-#define bin_index(sz) \
-    ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
-
-/*
-  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
-  first bin that is maintained in sorted order. This must
-  be the smallest size corresponding to a given bin.
-
-  Normally, this should be MIN_LARGE_SIZE. But you can weaken
-  best fit guarantees to sometimes speed up malloc by increasing value.
-  Doing this means that malloc may choose a chunk that is
-  non-best-fitting by up to the width of the bin.
-
-  Some useful cutoff values:
-      512 - all bins sorted
-     2560 - leaves bins <=     64 bytes wide unsorted
-    12288 - leaves bins <=    512 bytes wide unsorted
-    65536 - leaves bins <=   4096 bytes wide unsorted
-   262144 - leaves bins <=  32768 bytes wide unsorted
-       -1 - no bins sorted (not recommended!)
-*/
-
-#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
-/* #define FIRST_SORTED_BIN_SIZE 65536 */
-
-/*
-  Unsorted chunks
-
-    All remainders from chunk splits, as well as all returned chunks,
-    are first placed in the "unsorted" bin. They are then placed
-    in regular bins after malloc gives them ONE chance to be used before
-    binning. So, basically, the unsorted_chunks list acts as a queue,
-    with chunks being placed on it in free (and malloc_consolidate),
-    and taken off (to be either used or placed in bins) in malloc.
-*/
-
-/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
-#define unsorted_chunks(M) (bin_at(M, 1))
-
-/*
-  Top
-
-    The top-most available chunk (i.e., the one bordering the end of
-    available memory) is treated specially. It is never included in
-    any bin, is used only if no other chunk is available, and is
-    released back to the system if it is very large (see
-    M_TRIM_THRESHOLD).  Because top initially
-    points to its own bin with initial zero size, thus forcing
-    extension on the first malloc request, we avoid having any special
-    code in malloc to check whether it even exists yet. But we still
-    need to do so when getting memory from system, so we make
-    initial_top treat the bin as a legal but unusable chunk during the
-    interval between initialization and the first call to
-    sYSMALLOc. (This is somewhat delicate, since it relies on
-    the 2 preceding words to be zero during this interval as well.)
-*/
-
-/* Conveniently, the unsorted bin can be used as dummy top on first call */
-#define initial_top(M) (unsorted_chunks(M))
-
-/*
-  Binmap
-
-    To help compensate for the large number of bins, a one-level index
-    structure is used for bin-by-bin searching.  `binmap' is a
-    bitvector recording whether bins are definitely empty so they can
-    be skipped over during during traversals.  The bits are NOT always
-    cleared as soon as bins are empty, but instead only
-    when they are noticed to be empty during traversal in malloc.
-*/
-
-/* Conservatively use 32 bits per map word, even if on 64bit system */
-#define BINMAPSHIFT 5
-#define BITSPERMAP (1U << BINMAPSHIFT)
-#define BINMAPSIZE (NBINS / BITSPERMAP)
-
-#define idx2block(i) ((i) >> BINMAPSHIFT)
-#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
-
-#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
-#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
-#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i))
-
-/*
-  Fastbins
-
-    An array of lists holding recently freed small chunks.  Fastbins
-    are not doubly linked.  It is faster to single-link them, and
-    since chunks are never removed from the middles of these lists,
-    double linking is not necessary. Also, unlike regular bins, they
-    are not even processed in FIFO order (they use faster LIFO) since
-    ordering doesn't much matter in the transient contexts in which
-    fastbins are normally used.
-
-    Chunks in fastbins keep their inuse bit set, so they cannot
-    be consolidated with other free chunks. malloc_consolidate
-    releases all chunks in fastbins and consolidates them with
-    other free chunks.
-*/
-
-typedef struct malloc_chunk *mfastbinptr;
-
-/* offset 2 to use otherwise unindexable first 2 bins */
-#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2)
-
-/* The maximum fastbin request size we support */
-#define MAX_FAST_SIZE 80
-
-#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE)) + 1)
-
-/*
-  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
-  that triggers automatic consolidation of possibly-surrounding
-  fastbin chunks. This is a heuristic, so the exact value should not
-  matter too much. It is defined at half the default trim threshold as a
-  compromise heuristic to only attempt consolidation if it is likely
-  to lead to trimming. However, it is not dynamically tunable, since
-  consolidation reduces fragmentation surrounding loarge chunks even
-  if trimming is not used.
-*/
-
-#define FASTBIN_CONSOLIDATION_THRESHOLD \
-    ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
-
-/*
-  Since the lowest 2 bits in max_fast don't matter in size comparisons,
-  they are used as flags.
-*/
-
-/*
-  ANYCHUNKS_BIT held in max_fast indicates that there may be any
-  freed chunks at all. It is set true when entering a chunk into any
-  bin.
-*/
-
-#define ANYCHUNKS_BIT (1U)
-
-#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT))
-#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT)
-#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT)
-
-/*
-  FASTCHUNKS_BIT held in max_fast indicates that there are probably
-  some fastbin chunks. It is set true on entering a chunk into any
-  fastbin, and cleared only in malloc_consolidate.
-*/
-
-#define FASTCHUNKS_BIT (2U)
-
-#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT))
-#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT | ANYCHUNKS_BIT))
-#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT))
-
-/*
-   Set value of max_fast.
-   Use impossibly small value if 0.
-*/
-
-#define set_max_fast(M, s)                                            \
-    (M)->max_fast = (((s) == 0) ? SMALLBIN_WIDTH : request2size(s)) | \
-                    ((M)->max_fast & (FASTCHUNKS_BIT | ANYCHUNKS_BIT))
-
-#define get_max_fast(M) \
-    ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
-
-/*
-  morecore_properties is a status word holding dynamically discovered
-  or controlled properties of the morecore function
-*/
-
-#define MORECORE_CONTIGUOUS_BIT (1U)
-
-#define contiguous(M) \
-    (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT))
-#define noncontiguous(M) \
-    (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0)
-#define set_contiguous(M) \
-    ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT)
-#define set_noncontiguous(M) \
-    ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
-
-/*
-   ----------- Internal state representation and initialization -----------
-*/
-
-struct malloc_state {
-
-    /* The maximum chunk size to be eligible for fastbin */
-    INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */
-
-    /* Fastbins */
-    mfastbinptr fastbins[NFASTBINS];
-
-    /* Base of the topmost chunk -- not otherwise kept in a bin */
-    mchunkptr top;
-
-    /* The remainder from the most recent split of a small request */
-    mchunkptr last_remainder;
-
-    /* Normal bins packed as described above */
-    mchunkptr bins[NBINS * 2];
-
-    /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
-    unsigned int binmap[BINMAPSIZE + 1];
-
-    /* Tunable parameters */
-    CHUNK_SIZE_T trim_threshold;
-    INTERNAL_SIZE_T top_pad;
-    INTERNAL_SIZE_T mmap_threshold;
-
-    /* Memory map support */
-    int n_mmaps;
-    int n_mmaps_max;
-    int max_n_mmaps;
-
-    /* Cache malloc_getpagesize */
-    unsigned int pagesize;
-
-    /* Track properties of MORECORE */
-    unsigned int morecore_properties;
-
-    /* Statistics */
-    INTERNAL_SIZE_T mmapped_mem;
-    INTERNAL_SIZE_T sbrked_mem;
-    INTERNAL_SIZE_T max_sbrked_mem;
-    INTERNAL_SIZE_T max_mmapped_mem;
-    INTERNAL_SIZE_T max_total_mem;
-};
-
-typedef struct malloc_state *mstate;
-
-/*
-   There is exactly one instance of this struct in this malloc.
-   If you are adapting this malloc in a way that does NOT use a static
-   malloc_state, you MUST explicitly zero-fill it before using. This
-   malloc relies on the property that malloc_state is initialized to
-   all zeroes (as is true of C statics).
-*/
-
-static struct malloc_state av_; /* never directly referenced */
-
-/*
-   All uses of av_ are via get_malloc_state().
-   At most one "call" to get_malloc_state is made per invocation of
-   the public versions of malloc and free, but other routines
-   that in turn invoke malloc and/or free may call more then once.
-   Also, it is called in check* routines if DL_DEBUG is set.
-*/
-
-#define get_malloc_state() (&(av_))
-
-/*
-  Initialize a malloc_state struct.
-
-  This is called only from within malloc_consolidate, which needs
-  be called in the same contexts anyway.  It is never called directly
-  outside of malloc_consolidate because some optimizing compilers try
-  to inline it at all call points, which turns out not to be an
-  optimization at all. (Inlining it in malloc_consolidate is fine though.)
-*/
-
-#if __STD_C
-static void malloc_init_state(mstate av)
-#else
-static void malloc_init_state(av) mstate av;
-#endif
-{
-    int i;
-    mbinptr bin;
-
-    /* Establish circular links for normal bins */
-    for (i = 1; i < NBINS; ++i) {
-        bin = bin_at(av, i);
-        bin->fd = bin->bk = bin;
-    }
-
-    av->top_pad = DEFAULT_TOP_PAD;
-    av->n_mmaps_max = DEFAULT_MMAP_MAX;
-    av->mmap_threshold = DEFAULT_MMAP_THRESHOLD;
-    av->trim_threshold = DEFAULT_TRIM_THRESHOLD;
-
-#if MORECORE_CONTIGUOUS
-    set_contiguous(av);
-#else
-    set_noncontiguous(av);
-#endif
-
-    set_max_fast(av, DEFAULT_MXFAST);
-
-    av->top = initial_top(av);
-    av->pagesize = malloc_getpagesize;
-}
-
-/*
-   Other internal utilities operating on mstates
-*/
-
-#if __STD_C
-static Void_t *sYSMALLOc(INTERNAL_SIZE_T, mstate);
-static int sYSTRIm(size_t, mstate);
-static void malloc_consolidate(mstate);
-static Void_t **iALLOc(size_t, size_t *, int, Void_t **);
-#else
-static Void_t *sYSMALLOc();
-static int sYSTRIm();
-static void malloc_consolidate();
-static Void_t **iALLOc();
-#endif
-
-/*
-  Debugging support
-
-  These routines make a number of assertions about the states
-  of data structures that should be true at all times. If any
-  are not true, it's very likely that a user program has somehow
-  trashed memory. (It's also possible that there is a coding error
-  in malloc. In which case, please report it!)
-*/
-
-#if !DL_DEBUG
-
-#define check_chunk(P)
-#define check_free_chunk(P)
-#define check_inuse_chunk(P)
-#define check_remalloced_chunk(P, N)
-#define check_malloced_chunk(P, N)
-#define check_malloc_state()
-
-#else
-#define check_chunk(P) do_check_chunk(P)
-#define check_free_chunk(P) do_check_free_chunk(P)
-#define check_inuse_chunk(P) do_check_inuse_chunk(P)
-#define check_remalloced_chunk(P, N) do_check_remalloced_chunk(P, N)
-#define check_malloced_chunk(P, N) do_check_malloced_chunk(P, N)
-#define check_malloc_state() do_check_malloc_state()
-
-/*
-  Properties of all chunks
-*/
-
-#if __STD_C
-static void do_check_chunk(mchunkptr p)
-#else
-static void do_check_chunk(p) mchunkptr p;
-#endif
-{
-    mstate av = get_malloc_state();
-    CHUNK_SIZE_T sz = chunksize(p);
-    /* min and max possible addresses assuming contiguous allocation */
-    char *max_address = (char *)(av->top) + chunksize(av->top);
-    char *min_address = max_address - av->sbrked_mem;
-
-    if (!chunk_is_mmapped(p)) {
-
-        /* Has legal address ... */
-        if (p != av->top) {
-            if (contiguous(av)) {
-                assert(((char *)p) >= min_address);
-                assert(((char *)p + sz) <= ((char *)(av->top)));
-            }
-        } else {
-            /* top size is always at least MINSIZE */
-            assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
-            /* top predecessor always marked inuse */
-            assert(prev_inuse(p));
-        }
-    } else {
-#if HAVE_MMAP
-        /* address is outside main heap  */
-        if (contiguous(av) && av->top != initial_top(av)) {
-            assert(((char *)p) < min_address || ((char *)p) > max_address);
-        }
-        /* chunk is page-aligned */
-        assert(((p->prev_size + sz) & (av->pagesize - 1)) == 0);
-        /* mem is aligned */
-        assert(aligned_OK(chunk2mem(p)));
-#else
-        /* force an appropriate assert violation if debug set */
-        assert(!chunk_is_mmapped(p));
-#endif
-    }
-}
-
-/*
-  Properties of free chunks
-*/
-
-#if __STD_C
-static void do_check_free_chunk(mchunkptr p)
-#else
-static void do_check_free_chunk(p) mchunkptr p;
-#endif
-{
-    mstate av = get_malloc_state();
-
-    INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
-    mchunkptr next = chunk_at_offset(p, sz);
-
-    do_check_chunk(p);
-
-    /* Chunk must claim to be free ... */
-    assert(!inuse(p));
-    assert(!chunk_is_mmapped(p));
-
-    /* Unless a special marker, must have OK fields */
-    if ((CHUNK_SIZE_T)(sz) >= MINSIZE) {
-        assert((sz & MALLOC_ALIGN_MASK) == 0);
-        assert(aligned_OK(chunk2mem(p)));
-        /* ... matching footer field */
-        assert(next->prev_size == sz);
-        /* ... and is fully consolidated */
-        assert(prev_inuse(p));
-        assert(next == av->top || inuse(next));
-
-        /* ... and has minimally sane links */
-        assert(p->fd->bk == p);
-        assert(p->bk->fd == p);
-    } else { /* markers are always of size SIZE_SZ */
-        assert(sz == SIZE_SZ);
-    }
-}
-
-/*
-  Properties of inuse chunks
-*/
-
-#if __STD_C
-static void do_check_inuse_chunk(mchunkptr p)
-#else
-static void do_check_inuse_chunk(p) mchunkptr p;
-#endif
-{
-    mstate av = get_malloc_state();
-    mchunkptr next;
-    do_check_chunk(p);
-
-    if (chunk_is_mmapped(p)) {
-        return; /* mmapped chunks have no next/prev */
-    }
-
-    /* Check whether it claims to be in use ... */
-    assert(inuse(p));
-
-    next = next_chunk(p);
-
-    /* ... and is surrounded by OK chunks.
-      Since more things can be checked with free chunks than inuse ones,
-      if an inuse chunk borders them and debug is on, it's worth doing them.
-    */
-    if (!prev_inuse(p)) {
-        /* Note that we cannot even look at prev unless it is not inuse */
-        mchunkptr prv = prev_chunk(p);
-        assert(next_chunk(prv) == p);
-        do_check_free_chunk(prv);
-    }
-
-    if (next == av->top) {
-        assert(prev_inuse(next));
-        assert(chunksize(next) >= MINSIZE);
-    } else if (!inuse(next)) {
-        do_check_free_chunk(next);
-    }
-}
-
-/*
-  Properties of chunks recycled from fastbins
-*/
-
-#if __STD_C
-static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
-#else
-static void do_check_remalloced_chunk(p, s) mchunkptr p;
-INTERNAL_SIZE_T s;
-#endif
-{
-    INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
-
-    do_check_inuse_chunk(p);
-
-    /* Legal size ... */
-    assert((sz & MALLOC_ALIGN_MASK) == 0);
-    assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
-    /* ... and alignment */
-    assert(aligned_OK(chunk2mem(p)));
-    /* chunk is less than MINSIZE more than request */
-    assert((long)(sz) - (long)(s) >= 0);
-    assert((long)(sz) - (long)(s + MINSIZE) < 0);
-}
-
-/*
-  Properties of nonrecycled chunks at the point they are malloced
-*/
-
-#if __STD_C
-static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
-#else
-static void do_check_malloced_chunk(p, s) mchunkptr p;
-INTERNAL_SIZE_T s;
-#endif
-{
-    /* same as recycled case ... */
-    do_check_remalloced_chunk(p, s);
-
-    /*
-      ... plus,  must obey implementation invariant that prev_inuse is
-      always true of any allocated chunk; i.e., that each allocated
-      chunk borders either a previously allocated and still in-use
-      chunk, or the base of its memory arena. This is ensured
-      by making all allocations from the the `lowest' part of any found
-      chunk.  This does not necessarily hold however for chunks
-      recycled via fastbins.
-    */
-
-    assert(prev_inuse(p));
-}
-
-/*
-  Properties of malloc_state.
-
-  This may be useful for debugging malloc, as well as detecting user
-  programmer errors that somehow write into malloc_state.
-
-  If you are extending or experimenting with this malloc, you can
-  probably figure out how to hack this routine to print out or
-  display chunk addresses, sizes, bins, and other instrumentation.
-*/
-
-static void do_check_malloc_state(void) {
-    mstate av = get_malloc_state();
-    int i;
-    mchunkptr p;
-    mchunkptr q;
-    mbinptr b;
-    unsigned int binbit;
-    int empty;
-    unsigned int idx;
-    INTERNAL_SIZE_T size;
-    CHUNK_SIZE_T total = 0;
-    int max_fast_bin;
-
-    /* internal size_t must be no wider than pointer type */
-    assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char *));
-
-    /* alignment is a power of 2 */
-    assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
-
-    /* cannot run remaining checks until fully initialized */
-    if (av->top == 0 || av->top == initial_top(av)) {
-        return;
-    }
-
-    /* pagesize is a power of 2 */
-    assert((av->pagesize & (av->pagesize - 1)) == 0);
-
-    /* properties of fastbins */
-
-    /* max_fast is in allowed range */
-    assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE));
-
-    max_fast_bin = fastbin_index(av->max_fast);
-
-    for (i = 0; NFASTBINS - i > 0; ++i) {
-        p = av->fastbins[i];
-
-        /* all bins past max_fast are empty */
-        if (i > max_fast_bin) {
-            assert(p == 0);
-        }
-
-        while (p != 0) {
-            /* each chunk claims to be inuse */
-            do_check_inuse_chunk(p);
-            total += chunksize(p);
-            /* chunk belongs in this bin */
-            assert(fastbin_index(chunksize(p)) == i);
-            p = p->fd;
-        }
-    }
-
-    if (total != 0) {
-        assert(have_fastchunks(av));
-    } else if (!have_fastchunks(av)) {
-        assert(total == 0);
-    }
-
-    /* check normal bins */
-    for (i = 1; i < NBINS; ++i) {
-        b = bin_at(av, i);
-
-        /* binmap is accurate (except for bin 1 == unsorted_chunks) */
-        if (i >= 2) {
-            binbit = get_binmap(av, i);
-            empty = last(b) == b;
-            if (!binbit) {
-                assert(empty);
-            } else if (!empty) {
-                assert(binbit);
-            }
-        }
-
-        for (p = last(b); p != b; p = p->bk) {
-            /* each chunk claims to be free */
-            do_check_free_chunk(p);
-            size = chunksize(p);
-            total += size;
-            if (i >= 2) {
-                /* chunk belongs in bin */
-                idx = bin_index(size);
-                assert(idx == i);
-                /* lists are sorted */
-                if ((CHUNK_SIZE_T)size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
-                    assert(p->bk == b ||
-                           (CHUNK_SIZE_T)chunksize(p->bk) >=
-                               (CHUNK_SIZE_T)chunksize(p));
-                }
-            }
-            /* chunk is followed by a legal chain of inuse chunks */
-            for (q = next_chunk(p);
-                 (q != av->top && inuse(q) &&
-                  (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE);
-                 q = next_chunk(q)) {
-                do_check_inuse_chunk(q);
-            }
-        }
-    }
-
-    /* top chunk is OK */
-    check_chunk(av->top);
-
-    /* sanity checks for statistics */
-
-    assert(total <= (CHUNK_SIZE_T)(av->max_total_mem));
-    assert(av->n_mmaps >= 0);
-    assert(av->n_mmaps <= av->max_n_mmaps);
-
-    assert((CHUNK_SIZE_T)(av->sbrked_mem) <=
-           (CHUNK_SIZE_T)(av->max_sbrked_mem));
-
-    assert((CHUNK_SIZE_T)(av->mmapped_mem) <=
-           (CHUNK_SIZE_T)(av->max_mmapped_mem));
-
-    assert((CHUNK_SIZE_T)(av->max_total_mem) >=
-           (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem));
-}
-#endif
-
-/* ----------- Routines dealing with system allocation -------------- */
-
-/*
-  sysmalloc handles malloc cases requiring more memory from the system.
-  On entry, it is assumed that av->top does not have enough
-  space to service request for nb bytes, thus requiring that av->top
-  be extended or replaced.
-*/
-
-#if __STD_C
-static Void_t *sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
-#else
-static Void_t *sYSMALLOc(nb, av)
-INTERNAL_SIZE_T nb;
-mstate av;
-#endif
-{
-    mchunkptr old_top;        /* incoming value of av->top */
-    INTERNAL_SIZE_T old_size; /* its size */
-    char *old_end;            /* its end address */
-
-    long size; /* arg to first MORECORE or mmap call */
-    char *brk; /* return value from MORECORE */
-
-    long correction; /* arg to 2nd MORECORE call */
-    char *snd_brk;   /* 2nd return val */
-
-    INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
-    INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
-    char *aligned_brk;              /* aligned offset into brk */
-
-    mchunkptr p;                 /* the allocated/returned chunk */
-    mchunkptr remainder;         /* remainder from allocation */
-    CHUNK_SIZE_T remainder_size; /* its size */
-
-    CHUNK_SIZE_T sum; /* for updating stats */
-
-    size_t pagemask = av->pagesize - 1;
-
-    /*
-      If there is space available in fastbins, consolidate and retry
-      malloc from scratch rather than getting memory from system.  This
-      can occur only if nb is in smallbin range so we didn't consolidate
-      upon entry to malloc. It is much easier to handle this case here
-      than in malloc proper.
-    */
-
-    if (have_fastchunks(av)) {
-        assert(in_smallbin_range(nb));
-        malloc_consolidate(av);
-        return mALLOc(nb - MALLOC_ALIGN_MASK);
-    }
-
-#if HAVE_MMAP
-
-    /*
-      If have mmap, and the request size meets the mmap threshold, and
-      the system supports mmap, and there are few enough currently
-      allocated mmapped regions, try to directly map this request
-      rather than expanding top.
-    */
-
-    if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) &&
-        (av->n_mmaps < av->n_mmaps_max)) {
-
-        char *mm; /* return value from mmap call*/
-
-        /*
-          Round up size to nearest page.  For mmapped chunks, the overhead
-          is one SIZE_SZ unit larger than for normal chunks, because there
-          is no following chunk whose prev_size field could be used.
-        */
-        size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
-
-        /* Don't try if size wraps around 0 */
-        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
-
-            mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE));
-
-            if (mm != (char *)(MORECORE_FAILURE)) {
-
-                /*
-                  The offset to the start of the mmapped region is stored
-                  in the prev_size field of the chunk. This allows us to adjust
-                  returned start address to meet alignment requirements here
-                  and in memalign(), and still be able to compute proper
-                  address argument for later munmap in free() and realloc().
-                */
-
-                front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
-                if (front_misalign > 0) {
-                    correction = MALLOC_ALIGNMENT - front_misalign;
-                    p = (mchunkptr)(mm + correction);
-                    p->prev_size = correction;
-                    set_head(p, (size - correction) | IS_MMAPPED);
-                } else {
-                    p = (mchunkptr)mm;
-                    p->prev_size = 0;
-                    set_head(p, size | IS_MMAPPED);
-                }
-
-                /* update statistics */
-
-                if (++av->n_mmaps > av->max_n_mmaps) {
-                    av->max_n_mmaps = av->n_mmaps;
-                }
-
-                sum = av->mmapped_mem += size;
-                if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) {
-                    av->max_mmapped_mem = sum;
-                }
-                sum += av->sbrked_mem;
-                if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
-                    av->max_total_mem = sum;
-                }
-
-                check_chunk(p);
-
-                return chunk2mem(p);
-            }
-        }
-    }
-#endif
-
-    /* Record incoming configuration of top */
-
-    old_top = av->top;
-    old_size = chunksize(old_top);
-    old_end = (char *)(chunk_at_offset(old_top, old_size));
-
-    brk = snd_brk = (char *)(MORECORE_FAILURE);
-
-    /*
-       If not the first time through, we require old_size to be
-       at least MINSIZE and to have prev_inuse set.
-    */
-
-    assert((old_top == initial_top(av) && old_size == 0) ||
-           ((CHUNK_SIZE_T)(old_size) >= MINSIZE &&
-            prev_inuse(old_top)));
-
-    /* Precondition: not enough current space to satisfy nb request */
-    assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE));
-
-    /* Precondition: all fastbins are consolidated */
-    assert(!have_fastchunks(av));
-
-    /* Request enough space for nb + pad + overhead */
-
-    size = nb + av->top_pad + MINSIZE;
-
-    /*
-      If contiguous, we can subtract out existing space that we hope to
-      combine with new space. We add it back later only if
-      we don't actually get contiguous space.
-    */
-
-    if (contiguous(av)) {
-        size -= old_size;
-    }
-
-    /*
-      Round to a multiple of page size.
-      If MORECORE is not contiguous, this ensures that we only call it
-      with whole-page arguments.  And if MORECORE is contiguous and
-      this is not first time through, this preserves page-alignment of
-      previous calls. Otherwise, we correct to page-align below.
-    */
-
-    size = (size + pagemask) & ~pagemask;
-
-    /*
-      Don't try to call MORECORE if argument is so big as to appear
-      negative. Note that since mmap takes size_t arg, it may succeed
-      below even if we cannot call MORECORE.
-    */
-
-    if (size > 0) {
-        brk = (char *)(MORECORE(size));
-    }
-
-    /*
-      If have mmap, try using it as a backup when MORECORE fails or
-      cannot be used. This is worth doing on systems that have "holes" in
-      address space, so sbrk cannot extend to give contiguous space, but
-      space is available elsewhere.  Note that we ignore mmap max count
-      and threshold limits, since the space will not be used as a
-      segregated mmap region.
-    */
-
-#if HAVE_MMAP
-    if (brk == (char *)(MORECORE_FAILURE)) {
-
-        /* Cannot merge with old top, so add its size back in */
-        if (contiguous(av)) {
-            size = (size + old_size + pagemask) & ~pagemask;
-        }
-
-        /* If we are relying on mmap as backup, then use larger units */
-        if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE)) {
-            size = MMAP_AS_MORECORE_SIZE;
-        }
-
-        /* Don't try if size wraps around 0 */
-        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
-
-            brk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE));
-
-            if (brk != (char *)(MORECORE_FAILURE)) {
-
-                /* We do not need, and cannot use, another sbrk call to find end */
-                snd_brk = brk + size;
-
-                /*
-                   Record that we no longer have a contiguous sbrk region.
-                   After the first time mmap is used as backup, we do not
-                   ever rely on contiguous space since this could incorrectly
-                   bridge regions.
-                */
-                set_noncontiguous(av);
-            }
-        }
-    }
-#endif
-
-    if (brk != (char *)(MORECORE_FAILURE)) {
-        av->sbrked_mem += size;
-
-        /*
-          If MORECORE extends previous space, we can likewise extend top size.
-        */
-
-        if (brk == old_end && snd_brk == (char *)(MORECORE_FAILURE)) {
-            set_head(old_top, (size + old_size) | PREV_INUSE);
-        }
-
-        /*
-          Otherwise, make adjustments:
-
-          * If the first time through or noncontiguous, we need to call sbrk
-            just to find out where the end of memory lies.
-
-          * We need to ensure that all returned chunks from malloc will meet
-            MALLOC_ALIGNMENT
-
-          * If there was an intervening foreign sbrk, we need to adjust sbrk
-            request size to account for fact that we will not be able to
-            combine new space with existing space in old_top.
-
-          * Almost all systems internally allocate whole pages at a time, in
-            which case we might as well use the whole last page of request.
-            So we allocate enough more memory to hit a page boundary now,
-            which in turn causes future contiguous calls to page-align.
-        */
-
-        else {
-            front_misalign = 0;
-            end_misalign = 0;
-            correction = 0;
-            aligned_brk = brk;
-
-            /*
-              If MORECORE returns an address lower than we have seen before,
-              we know it isn't really contiguous.  This and some subsequent
-              checks help cope with non-conforming MORECORE functions and
-              the presence of "foreign" calls to MORECORE from outside of
-              malloc or by other threads.  We cannot guarantee to detect
-              these in all cases, but cope with the ones we do detect.
-            */
-            if (contiguous(av) && old_size != 0 && brk < old_end) {
-                set_noncontiguous(av);
-            }
-
-            /* handle contiguous cases */
-            if (contiguous(av)) {
-
-                /*
-                   We can tolerate forward non-contiguities here (usually due
-                   to foreign calls) but treat them as part of our space for
-                   stats reporting.
-                */
-                if (old_size != 0) {
-                    av->sbrked_mem += brk - old_end;
-                }
-
-                /* Guarantee alignment of first new chunk made from this space */
-
-                front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
-                if (front_misalign > 0) {
-
-                    /*
-                      Skip over some bytes to arrive at an aligned position.
-                      We don't need to specially mark these wasted front bytes.
-                      They will never be accessed anyway because
-                      prev_inuse of av->top (and any chunk created from its start)
-                      is always true after initialization.
-                    */
-
-                    correction = MALLOC_ALIGNMENT - front_misalign;
-                    aligned_brk += correction;
-                }
-
-                /*
-                  If this isn't adjacent to existing space, then we will not
-                  be able to merge with old_top space, so must add to 2nd request.
-                */
-
-                correction += old_size;
-
-                /* Extend the end address to hit a page boundary */
-                end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
-                correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
-
-                assert(correction >= 0);
-                snd_brk = (char *)(MORECORE(correction));
-
-                if (snd_brk == (char *)(MORECORE_FAILURE)) {
-                    /*
-                      If can't allocate correction, try to at least find out current
-                      brk.  It might be enough to proceed without failing.
-                    */
-                    correction = 0;
-                    snd_brk = (char *)(MORECORE(0));
-                } else if (snd_brk < brk) {
-                    /*
-                      If the second call gives noncontiguous space even though
-                      it says it won't, the only course of action is to ignore
-                      results of second call, and conservatively estimate where
-                      the first call left us. Also set noncontiguous, so this
-                      won't happen again, leaving at most one hole.
-
-                      Note that this check is intrinsically incomplete.  Because
-                      MORECORE is allowed to give more space than we ask for,
-                      there is no reliable way to detect a noncontiguity
-                      producing a forward gap for the second call.
-                    */
-                    snd_brk = brk + size;
-                    correction = 0;
-                    set_noncontiguous(av);
-                }
-            }
-
-            /* handle non-contiguous cases */
-            else {
-                /* MORECORE/mmap must correctly align */
-                assert(aligned_OK(chunk2mem(brk)));
-
-                /* Find out current end of memory */
-                if (snd_brk == (char *)(MORECORE_FAILURE)) {
-                    snd_brk = (char *)(MORECORE(0));
-                    av->sbrked_mem += snd_brk - brk - size;
-                }
-            }
-
-            /* Adjust top based on results of second sbrk */
-            if (snd_brk != (char *)(MORECORE_FAILURE)) {
-                av->top = (mchunkptr)aligned_brk;
-                set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
-                av->sbrked_mem += correction;
-
-                /*
-                  If not the first time through, we either have a
-                  gap due to foreign sbrk or a non-contiguous region.  Insert a
-                  double fencepost at old_top to prevent consolidation with space
-                  we don't own. These fenceposts are artificial chunks that are
-                  marked as inuse and are in any case too small to use.  We need
-                  two to make sizes and alignments work out.
-                */
-
-                if (old_size != 0) {
-                    /*
-                       Shrink old_top to insert fenceposts, keeping size a
-                       multiple of MALLOC_ALIGNMENT. We know there is at least
-                       enough space in old_top to do this.
-                    */
-                    old_size = (old_size - 3 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
-                    set_head(old_top, old_size | PREV_INUSE);
-
-                    /*
-                      Note that the following assignments completely overwrite
-                      old_top when old_size was previously MINSIZE.  This is
-                      intentional. We need the fencepost, even if old_top otherwise gets
-                      lost.
-                    */
-                    chunk_at_offset(old_top, old_size)->size =
-                        SIZE_SZ | PREV_INUSE;
-
-                    chunk_at_offset(old_top, old_size + SIZE_SZ)->size =
-                        SIZE_SZ | PREV_INUSE;
-
-                    /*
-                       If possible, release the rest, suppressing trimming.
-                    */
-                    if (old_size >= MINSIZE) {
-                        INTERNAL_SIZE_T tt = av->trim_threshold;
-                        av->trim_threshold = (INTERNAL_SIZE_T)(-1);
-                        fREe(chunk2mem(old_top));
-                        av->trim_threshold = tt;
-                    }
-                }
-            }
-        }
-
-        /* Update statistics */
-        sum = av->sbrked_mem;
-        if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem)) {
-            av->max_sbrked_mem = sum;
-        }
-
-        sum += av->mmapped_mem;
-        if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
-            av->max_total_mem = sum;
-        }
-
-        check_malloc_state();
-
-        /* finally, do the allocation */
-
-        p = av->top;
-        size = chunksize(p);
-
-        /* check that one of the above allocation paths succeeded */
-        if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
-            remainder_size = size - nb;
-            remainder = chunk_at_offset(p, nb);
-            av->top = remainder;
-            set_head(p, nb | PREV_INUSE);
-            set_head(remainder, remainder_size | PREV_INUSE);
-            check_malloced_chunk(p, nb);
-            return chunk2mem(p);
-        }
-    }
-
-    /* catch all failure paths */
-    MALLOC_FAILURE_ACTION;
-    return 0;
-}
-
-/*
-  sYSTRIm is an inverse of sorts to sYSMALLOc.  It gives memory back
-  to the system (via negative arguments to sbrk) if there is unused
-  memory at the `high' end of the malloc pool. It is called
-  automatically by free() when top space exceeds the trim
-  threshold. It is also called by the public malloc_trim routine.  It
-  returns 1 if it actually released any memory, else 0.
-*/
-
-#if __STD_C
-static int sYSTRIm(size_t pad, mstate av)
-#else
-static int sYSTRIm(pad, av)
-size_t pad;
-mstate av;
-#endif
-{
-    long top_size;     /* Amount of top-most memory */
-    long extra;        /* Amount to release */
-    long released;     /* Amount actually released */
-    char *current_brk; /* address returned by pre-check sbrk call */
-    char *new_brk;     /* address returned by post-check sbrk call */
-    size_t pagesz;
-
-    pagesz = av->pagesize;
-    top_size = chunksize(av->top);
-
-    /* Release in pagesize units, keeping at least one page */
-    extra = ((top_size - pad - MINSIZE + (pagesz - 1)) / pagesz - 1) * pagesz;
-
-    if (extra > 0) {
-
-        /*
-          Only proceed if end of memory is where we last set it.
-          This avoids problems if there were foreign sbrk calls.
-        */
-        current_brk = (char *)(MORECORE(0));
-        if (current_brk == (char *)(av->top) + top_size) {
-
-            /*
-              Attempt to release memory. We ignore MORECORE return value,
-              and instead call again to find out where new end of memory is.
-              This avoids problems if first call releases less than we asked,
-              of if failure somehow altered brk value. (We could still
-              encounter problems if it altered brk in some very bad way,
-              but the only thing we can do is adjust anyway, which will cause
-              some downstream failure.)
-            */
-
-            MORECORE(-extra);
-            new_brk = (char *)(MORECORE(0));
-
-            if (new_brk != (char *)MORECORE_FAILURE) {
-                released = (long)(current_brk - new_brk);
-
-                if (released != 0) {
-                    /* Success. Adjust top. */
-                    av->sbrked_mem -= released;
-                    set_head(av->top, (top_size - released) | PREV_INUSE);
-                    check_malloc_state();
-                    return 1;
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-/*
-  ------------------------------ malloc ------------------------------
-*/
-
-#if __STD_C
-Void_t *mALLOc(size_t bytes)
-#else
-Void_t *mALLOc(bytes)
-size_t bytes;
-#endif
-{
-    mstate av = get_malloc_state();
-
-    INTERNAL_SIZE_T nb; /* normalized request size */
-    unsigned int idx;   /* associated bin index */
-    mbinptr bin;        /* associated bin */
-    mfastbinptr *fb;    /* associated fastbin */
-
-    mchunkptr victim;     /* inspected/selected chunk */
-    INTERNAL_SIZE_T size; /* its size */
-    int victim_index;     /* its bin index */
-
-    mchunkptr remainder;         /* remainder from a split */
-    CHUNK_SIZE_T remainder_size; /* its size */
-
-    unsigned int block; /* bit map traverser */
-    unsigned int bit;   /* bit map traverser */
-    unsigned int map;   /* current word of binmap */
-
-    mchunkptr fwd; /* misc temp for linking */
-    mchunkptr bck; /* misc temp for linking */
-
-    /*
-      Convert request size to internal form by adding SIZE_SZ bytes
-      overhead plus possibly more to obtain necessary alignment and/or
-      to obtain a size of at least MINSIZE, the smallest allocatable
-      size. Also, checked_request2size traps (returning 0) request sizes
-      that are so large that they wrap around zero when padded and
-      aligned.
-    */
-
-    checked_request2size(bytes, nb);
-
-    /*
-      Bypass search if no frees yet
-     */
-    if (!have_anychunks(av)) {
-        if (av->max_fast == 0) { /* initialization check */
-            malloc_consolidate(av);
-        }
-        goto use_top;
-    }
-
-    /*
-      If the size qualifies as a fastbin, first check corresponding bin.
-    */
-
-    if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) {
-        fb = &(av->fastbins[(fastbin_index(nb))]);
-        if ((victim = *fb) != 0) {
-            *fb = victim->fd;
-            check_remalloced_chunk(victim, nb);
-            return chunk2mem(victim);
-        }
-    }
-
-    /*
-      If a small request, check regular bin.  Since these "smallbins"
-      hold one size each, no searching within bins is necessary.
-      (For a large request, we need to wait until unsorted chunks are
-      processed to find best fit. But for small ones, fits are exact
-      anyway, so we can check now, which is faster.)
-    */
-
-    if (in_smallbin_range(nb)) {
-        idx = smallbin_index(nb);
-        bin = bin_at(av, idx);
-
-        if ((victim = last(bin)) != bin) {
-            bck = victim->bk;
-            set_inuse_bit_at_offset(victim, nb);
-            bin->bk = bck;
-            bck->fd = bin;
-
-            check_malloced_chunk(victim, nb);
-            return chunk2mem(victim);
-        }
-    }
-
-    /*
-       If this is a large request, consolidate fastbins before continuing.
-       While it might look excessive to kill all fastbins before
-       even seeing if there is space available, this avoids
-       fragmentation problems normally associated with fastbins.
-       Also, in practice, programs tend to have runs of either small or
-       large requests, but less often mixtures, so consolidation is not
-       invoked all that often in most programs. And the programs that
-       it is called frequently in otherwise tend to fragment.
-    */
-
-    else {
-        idx = largebin_index(nb);
-        if (have_fastchunks(av)) {
-            malloc_consolidate(av);
-        }
-    }
-
-    /*
-      Process recently freed or remaindered chunks, taking one only if
-      it is exact fit, or, if this a small request, the chunk is remainder from
-      the most recent non-exact fit.  Place other traversed chunks in
-      bins.  Note that this step is the only place in any routine where
-      chunks are placed in bins.
-    */
-
-    while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
-        bck = victim->bk;
-        size = chunksize(victim);
-
-        /*
-           If a small request, try to use last remainder if it is the
-           only chunk in unsorted bin.  This helps promote locality for
-           runs of consecutive small requests. This is the only
-           exception to best-fit, and applies only when there is
-           no exact fit for a small chunk.
-        */
-
-        if (in_smallbin_range(nb) &&
-            bck == unsorted_chunks(av) &&
-            victim == av->last_remainder &&
-            (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
-
-            /* split and reattach remainder */
-            remainder_size = size - nb;
-            remainder = chunk_at_offset(victim, nb);
-            unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
-            av->last_remainder = remainder;
-            remainder->bk = remainder->fd = unsorted_chunks(av);
-
-            set_head(victim, nb | PREV_INUSE);
-            set_head(remainder, remainder_size | PREV_INUSE);
-            set_foot(remainder, remainder_size);
-
-            check_malloced_chunk(victim, nb);
-            return chunk2mem(victim);
-        }
-
-        /* remove from unsorted list */
-        unsorted_chunks(av)->bk = bck;
-        bck->fd = unsorted_chunks(av);
-
-        /* Take now instead of binning if exact fit */
-
-        if (size == nb) {
-            set_inuse_bit_at_offset(victim, size);
-            check_malloced_chunk(victim, nb);
-            return chunk2mem(victim);
-        }
-
-        /* place chunk in bin */
-
-        if (in_smallbin_range(size)) {
-            victim_index = smallbin_index(size);
-            bck = bin_at(av, victim_index);
-            fwd = bck->fd;
-        } else {
-            victim_index = largebin_index(size);
-            bck = bin_at(av, victim_index);
-            fwd = bck->fd;
-
-            if (fwd != bck) {
-                /* if smaller than smallest, place first */
-                if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) {
-                    fwd = bck;
-                    bck = bck->bk;
-                } else if ((CHUNK_SIZE_T)(size) >=
-                           (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
-
-                    /* maintain large bins in sorted order */
-                    size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
-                    while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) {
-                        fwd = fwd->fd;
-                    }
-                    bck = fwd->bk;
-                }
-            }
-        }
-
-        mark_bin(av, victim_index);
-        victim->bk = bck;
-        victim->fd = fwd;
-        fwd->bk = victim;
-        bck->fd = victim;
-    }
-
-    /*
-      If a large request, scan through the chunks of current bin to
-      find one that fits.  (This will be the smallest that fits unless
-      FIRST_SORTED_BIN_SIZE has been changed from default.)  This is
-      the only step where an unbounded number of chunks might be
-      scanned without doing anything useful with them. However the
-      lists tend to be short.
-    */
-
-    if (!in_smallbin_range(nb)) {
-        bin = bin_at(av, idx);
-
-        for (victim = last(bin); victim != bin; victim = victim->bk) {
-            size = chunksize(victim);
-
-            if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) {
-                remainder_size = size - nb;
-                unlink(victim, bck, fwd);
-
-                /* Exhaust */
-                if (remainder_size < MINSIZE) {
-                    set_inuse_bit_at_offset(victim, size);
-                    check_malloced_chunk(victim, nb);
-                    return chunk2mem(victim);
-                }
-                /* Split */
-                else {
-                    remainder = chunk_at_offset(victim, nb);
-                    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
-                    remainder->bk = remainder->fd = unsorted_chunks(av);
-                    set_head(victim, nb | PREV_INUSE);
-                    set_head(remainder, remainder_size | PREV_INUSE);
-                    set_foot(remainder, remainder_size);
-                    check_malloced_chunk(victim, nb);
-                    return chunk2mem(victim);
-                }
-            }
-        }
-    }
-
-    /*
-      Search for a chunk by scanning bins, starting with next largest
-      bin. This search is strictly by best-fit; i.e., the smallest
-      (with ties going to approximately the least recently used) chunk
-      that fits is selected.
-
-      The bitmap avoids needing to check that most blocks are nonempty.
-    */
-
-    ++idx;
-    bin = bin_at(av, idx);
-    block = idx2block(idx);
-    map = av->binmap[block];
-    bit = idx2bit(idx);
-
-    for (;;) {
-
-        /* Skip rest of block if there are no more set bits in this block.  */
-        if (bit > map || bit == 0) {
-            do {
-                if (++block >= BINMAPSIZE) { /* out of bins */
-                    goto use_top;
-                }
-            } while ((map = av->binmap[block]) == 0);
-
-            bin = bin_at(av, (block << BINMAPSHIFT));
-            bit = 1;
-        }
-
-        /* Advance to bin with set bit. There must be one. */
-        while ((bit & map) == 0) {
-            bin = next_bin(bin);
-            bit <<= 1;
-            assert(bit != 0);
-        }
-
-        /* Inspect the bin. It is likely to be non-empty */
-        victim = last(bin);
-
-        /*  If a false alarm (empty bin), clear the bit. */
-        if (victim == bin) {
-            av->binmap[block] = map &= ~bit; /* Write through */
-            bin = next_bin(bin);
-            bit <<= 1;
-        }
-
-        else {
-            size = chunksize(victim);
-
-            /*  We know the first chunk in this bin is big enough to use. */
-            assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb));
-
-            remainder_size = size - nb;
-
-            /* unlink */
-            bck = victim->bk;
-            bin->bk = bck;
-            bck->fd = bin;
-
-            /* Exhaust */
-            if (remainder_size < MINSIZE) {
-                set_inuse_bit_at_offset(victim, size);
-                check_malloced_chunk(victim, nb);
-                return chunk2mem(victim);
-            }
-
-            /* Split */
-            else {
-                remainder = chunk_at_offset(victim, nb);
-
-                unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
-                remainder->bk = remainder->fd = unsorted_chunks(av);
-                /* advertise as last remainder */
-                if (in_smallbin_range(nb)) {
-                    av->last_remainder = remainder;
-                }
-
-                set_head(victim, nb | PREV_INUSE);
-                set_head(remainder, remainder_size | PREV_INUSE);
-                set_foot(remainder, remainder_size);
-                check_malloced_chunk(victim, nb);
-                return chunk2mem(victim);
-            }
-        }
-    }
-
-use_top:
-    /*
-      If large enough, split off the chunk bordering the end of memory
-      (held in av->top). Note that this is in accord with the best-fit
-      search rule.  In effect, av->top is treated as larger (and thus
-      less well fitting) than any other available chunk since it can
-      be extended to be as large as necessary (up to system
-      limitations).
-
-      We require that av->top always exists (i.e., has size >=
-      MINSIZE) after initialization, so if it would otherwise be
-      exhuasted by current request, it is replenished. (The main
-      reason for ensuring it exists is that we may need MINSIZE space
-      to put in fenceposts in sysmalloc.)
-    */
-
-    victim = av->top;
-    size = chunksize(victim);
-
-    if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
-        remainder_size = size - nb;
-        remainder = chunk_at_offset(victim, nb);
-        av->top = remainder;
-        set_head(victim, nb | PREV_INUSE);
-        set_head(remainder, remainder_size | PREV_INUSE);
-
-        check_malloced_chunk(victim, nb);
-        return chunk2mem(victim);
-    }
-
-    /*
-       If no space in top, relay to handle system-dependent cases
-    */
-    return sYSMALLOc(nb, av);
-}
-
-/*
-  ------------------------------ free ------------------------------
-*/
-
-#if __STD_C
-void fREe(Void_t *mem)
-#else
-void fREe(mem) Void_t *mem;
-#endif
-{
-    mstate av = get_malloc_state();
-
-    mchunkptr p;              /* chunk corresponding to mem */
-    INTERNAL_SIZE_T size;     /* its size */
-    mfastbinptr *fb;          /* associated fastbin */
-    mchunkptr nextchunk;      /* next contiguous chunk */
-    INTERNAL_SIZE_T nextsize; /* its size */
-    int nextinuse;            /* true if nextchunk is used */
-    INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
-    mchunkptr bck;            /* misc temp for linking */
-    mchunkptr fwd;            /* misc temp for linking */
-
-    /* free(0) has no effect */
-    if (mem != 0) {
-        p = mem2chunk(mem);
-        size = chunksize(p);
-
-        check_inuse_chunk(p);
-
-        /*
-          If eligible, place chunk on a fastbin so it can be found
-          and used quickly in malloc.
-        */
-
-        if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast)
-
-#if TRIM_FASTBINS
-            /*
-               If TRIM_FASTBINS set, don't place chunks
-               bordering top into fastbins
-            */
-            && (chunk_at_offset(p, size) != av->top)
-#endif
-        ) {
-
-            set_fastchunks(av);
-            fb = &(av->fastbins[fastbin_index(size)]);
-            p->fd = *fb;
-            *fb = p;
-        }
-
-        /*
-           Consolidate other non-mmapped chunks as they arrive.
-        */
-
-        else if (!chunk_is_mmapped(p)) {
-            set_anychunks(av);
-
-            nextchunk = chunk_at_offset(p, size);
-            nextsize = chunksize(nextchunk);
-
-            /* consolidate backward */
-            if (!prev_inuse(p)) {
-                prevsize = p->prev_size;
-                size += prevsize;
-                p = chunk_at_offset(p, -((long)prevsize));
-                unlink(p, bck, fwd);
-            }
-
-            if (nextchunk != av->top) {
-                /* get and clear inuse bit */
-                nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
-                set_head(nextchunk, nextsize);
-
-                /* consolidate forward */
-                if (!nextinuse) {
-                    unlink(nextchunk, bck, fwd);
-                    size += nextsize;
-                }
-
-                /*
-                  Place the chunk in unsorted chunk list. Chunks are
-                  not placed into regular bins until after they have
-                  been given one chance to be used in malloc.
-                */
-
-                bck = unsorted_chunks(av);
-                fwd = bck->fd;
-                p->bk = bck;
-                p->fd = fwd;
-                bck->fd = p;
-                fwd->bk = p;
-
-                set_head(p, size | PREV_INUSE);
-                set_foot(p, size);
-
-                check_free_chunk(p);
-            }
-
-            /*
-               If the chunk borders the current high end of memory,
-               consolidate into top
-            */
-
-            else {
-                size += nextsize;
-                set_head(p, size | PREV_INUSE);
-                av->top = p;
-                check_chunk(p);
-            }
-
-            /*
-              If freeing a large space, consolidate possibly-surrounding
-              chunks. Then, if the total unused topmost memory exceeds trim
-              threshold, ask malloc_trim to reduce top.
-
-              Unless max_fast is 0, we don't know if there are fastbins
-              bordering top, so we cannot tell for sure whether threshold
-              has been reached unless fastbins are consolidated.  But we
-              don't want to consolidate on each free.  As a compromise,
-              consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
-              is reached.
-            */
-
-            if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
-                if (have_fastchunks(av)) {
-                    malloc_consolidate(av);
-                }
-
-#ifndef MORECORE_CANNOT_TRIM
-                if ((CHUNK_SIZE_T)(chunksize(av->top)) >=
-                    (CHUNK_SIZE_T)(av->trim_threshold)) {
-                    sYSTRIm(av->top_pad, av);
-                }
-#endif
-            }
-        }
-        /*
-          If the chunk was allocated via mmap, release via munmap()
-          Note that if HAVE_MMAP is false but chunk_is_mmapped is
-          true, then user must have overwritten memory. There's nothing
-          we can do to catch this error unless DL_DEBUG is set, in which case
-          check_inuse_chunk (above) will have triggered error.
-        */
-
-        else {
-#if HAVE_MMAP
-            INTERNAL_SIZE_T offset = p->prev_size;
-            av->n_mmaps--;
-            av->mmapped_mem -= (size + offset);
-            munmap((char *)p - offset, size + offset);
-#endif
-        }
-    }
-}
-
-/*
-  ------------------------- malloc_consolidate -------------------------
-
-  malloc_consolidate is a specialized version of free() that tears
-  down chunks held in fastbins.  Free itself cannot be used for this
-  purpose since, among other things, it might place chunks back onto
-  fastbins.  So, instead, we need to use a minor variant of the same
-  code.
-
-  Also, because this routine needs to be called the first time through
-  malloc anyway, it turns out to be the perfect place to trigger
-  initialization code.
-*/
-
-#if __STD_C
-static void malloc_consolidate(mstate av)
-#else
-static void malloc_consolidate(av) mstate av;
-#endif
-{
-    mfastbinptr *fb;          /* current fastbin being consolidated */
-    mfastbinptr *maxfb;       /* last fastbin (for loop control) */
-    mchunkptr p;              /* current chunk being consolidated */
-    mchunkptr nextp;          /* next chunk to consolidate */
-    mchunkptr unsorted_bin;   /* bin header */
-    mchunkptr first_unsorted; /* chunk to link to */
-
-    /* These have same use as in free() */
-    mchunkptr nextchunk;
-    INTERNAL_SIZE_T size;
-    INTERNAL_SIZE_T nextsize;
-    INTERNAL_SIZE_T prevsize;
-    int nextinuse;
-    mchunkptr bck;
-    mchunkptr fwd;
-
-    /*
-      If max_fast is 0, we know that av hasn't
-      yet been initialized, in which case do so below
-    */
-
-    if (av->max_fast != 0) {
-        clear_fastchunks(av);
-
-        unsorted_bin = unsorted_chunks(av);
-
-        /*
-          Remove each chunk from fast bin and consolidate it, placing it
-          then in unsorted bin. Among other reasons for doing this,
-          placing in unsorted bin avoids needing to calculate actual bins
-          until malloc is sure that chunks aren't immediately going to be
-          reused anyway.
-        */
-
-        maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
-        fb = &(av->fastbins[0]);
-        do {
-            if ((p = *fb) != 0) {
-                *fb = 0;
-
-                do {
-                    check_inuse_chunk(p);
-                    nextp = p->fd;
-
-                    /* Slightly streamlined version of consolidation code in free() */
-                    size = p->size & ~PREV_INUSE;
-                    nextchunk = chunk_at_offset(p, size);
-                    nextsize = chunksize(nextchunk);
-
-                    if (!prev_inuse(p)) {
-                        prevsize = p->prev_size;
-                        size += prevsize;
-                        p = chunk_at_offset(p, -((long)prevsize));
-                        unlink(p, bck, fwd);
-                    }
-
-                    if (nextchunk != av->top) {
-                        nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
-                        set_head(nextchunk, nextsize);
-
-                        if (!nextinuse) {
-                            size += nextsize;
-                            unlink(nextchunk, bck, fwd);
-                        }
-
-                        first_unsorted = unsorted_bin->fd;
-                        unsorted_bin->fd = p;
-                        first_unsorted->bk = p;
-
-                        set_head(p, size | PREV_INUSE);
-                        p->bk = unsorted_bin;
-                        p->fd = first_unsorted;
-                        set_foot(p, size);
-                    }
-
-                    else {
-                        size += nextsize;
-                        set_head(p, size | PREV_INUSE);
-                        av->top = p;
-                    }
-
-                } while ((p = nextp) != 0);
-            }
-        } while (fb++ != maxfb);
-    } else {
-        malloc_init_state(av);
-        check_malloc_state();
-    }
-}
-
-/*
-  ------------------------------ realloc ------------------------------
-*/
-
-#if __STD_C
-Void_t *rEALLOc(Void_t *oldmem, size_t bytes)
-#else
-Void_t *rEALLOc(oldmem, bytes)
-Void_t *oldmem;
-size_t bytes;
-#endif
-{
-    mstate av = get_malloc_state();
-
-    INTERNAL_SIZE_T nb; /* padded request size */
-
-    mchunkptr oldp;          /* chunk corresponding to oldmem */
-    INTERNAL_SIZE_T oldsize; /* its size */
-
-    mchunkptr newp;          /* chunk to return */
-    INTERNAL_SIZE_T newsize; /* its size */
-    Void_t *newmem;          /* corresponding user mem */
-
-    mchunkptr next; /* next contiguous chunk after oldp */
-
-    mchunkptr remainder;         /* extra space at end of newp */
-    CHUNK_SIZE_T remainder_size; /* its size */
-
-    mchunkptr bck; /* misc temp for linking */
-    mchunkptr fwd; /* misc temp for linking */
-
-    CHUNK_SIZE_T copysize; /* bytes to copy */
-    unsigned int ncopies;  /* INTERNAL_SIZE_T words to copy */
-    INTERNAL_SIZE_T *s;    /* copy source */
-    INTERNAL_SIZE_T *d;    /* copy destination */
-
-#ifdef REALLOC_ZERO_BYTES_FREES
-    if (bytes == 0) {
-        fREe(oldmem);
-        return 0;
-    }
-#endif
-
-    /* realloc of null is supposed to be same as malloc */
-    if (oldmem == 0) {
-        return mALLOc(bytes);
-    }
-
-    checked_request2size(bytes, nb);
-
-    oldp = mem2chunk(oldmem);
-    oldsize = chunksize(oldp);
-
-    check_inuse_chunk(oldp);
-
-    if (!chunk_is_mmapped(oldp)) {
-
-        if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) {
-            /* already big enough; split below */
-            newp = oldp;
-            newsize = oldsize;
-        }
-
-        else {
-            next = chunk_at_offset(oldp, oldsize);
-
-            /* Try to expand forward into top */
-            if (next == av->top &&
-                (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
-                    (CHUNK_SIZE_T)(nb + MINSIZE)) {
-                set_head_size(oldp, nb);
-                av->top = chunk_at_offset(oldp, nb);
-                set_head(av->top, (newsize - nb) | PREV_INUSE);
-                return chunk2mem(oldp);
-            }
-
-            /* Try to expand forward into next chunk;  split off remainder below */
-            else if (next != av->top &&
-                     !inuse(next) &&
-                     (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
-                         (CHUNK_SIZE_T)(nb)) {
-                newp = oldp;
-                unlink(next, bck, fwd);
-            }
-
-            /* allocate, copy, free */
-            else {
-                newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
-                if (newmem == 0) {
-                    return 0; /* propagate failure */
-                }
-
-                newp = mem2chunk(newmem);
-                newsize = chunksize(newp);
-
-                /*
-                  Avoid copy if newp is next chunk after oldp.
-                */
-                if (newp == next) {
-                    newsize += oldsize;
-                    newp = oldp;
-                } else {
-                    /*
-                      Unroll copy of <= 36 bytes (72 if 8byte sizes)
-                      We know that contents have an odd number of
-                      INTERNAL_SIZE_T-sized words; minimally 3.
-                    */
-
-                    copysize = oldsize - SIZE_SZ;
-                    s = (INTERNAL_SIZE_T *)(oldmem);
-                    d = (INTERNAL_SIZE_T *)(newmem);
-                    ncopies = copysize / sizeof(INTERNAL_SIZE_T);
-                    assert(ncopies >= 3);
-
-                    if (ncopies > 9) {
-                        MALLOC_COPY(d, s, copysize);
-                    }
-
-                    else {
-                        *(d + 0) = *(s + 0);
-                        *(d + 1) = *(s + 1);
-                        *(d + 2) = *(s + 2);
-                        if (ncopies > 4) {
-                            *(d + 3) = *(s + 3);
-                            *(d + 4) = *(s + 4);
-                            if (ncopies > 6) {
-                                *(d + 5) = *(s + 5);
-                                *(d + 6) = *(s + 6);
-                                if (ncopies > 8) {
-                                    *(d + 7) = *(s + 7);
-                                    *(d + 8) = *(s + 8);
-                                }
-                            }
-                        }
-                    }
-
-                    fREe(oldmem);
-                    check_inuse_chunk(newp);
-                    return chunk2mem(newp);
-                }
-            }
-        }
-
-        /* If possible, free extra space in old or extended chunk */
-
-        assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb));
-
-        remainder_size = newsize - nb;
-
-        if (remainder_size < MINSIZE) { /* not enough extra to split off */
-            set_head_size(newp, newsize);
-            set_inuse_bit_at_offset(newp, newsize);
-        } else { /* split remainder */
-            remainder = chunk_at_offset(newp, nb);
-            set_head_size(newp, nb);
-            set_head(remainder, remainder_size | PREV_INUSE);
-            /* Mark remainder as inuse so free() won't complain */
-            set_inuse_bit_at_offset(remainder, remainder_size);
-            fREe(chunk2mem(remainder));
-        }
-
-        check_inuse_chunk(newp);
-        return chunk2mem(newp);
-    }
-
-    /*
-      Handle mmap cases
-    */
-
-    else {
-#if HAVE_MMAP
-
-#if HAVE_MREMAP
-        INTERNAL_SIZE_T offset = oldp->prev_size;
-        size_t pagemask = av->pagesize - 1;
-        char *cp;
-        CHUNK_SIZE_T sum;
-
-        /* Note the extra SIZE_SZ overhead */
-        newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
-
-        /* don't need to remap if still within same page */
-        if (oldsize == newsize - offset) {
-            return oldmem;
-        }
-
-        cp = (char *)mremap((char *)oldp - offset, oldsize + offset, newsize, 1);
-
-        if (cp != (char *)MORECORE_FAILURE) {
-
-            newp = (mchunkptr)(cp + offset);
-            set_head(newp, (newsize - offset) | IS_MMAPPED);
-
-            assert(aligned_OK(chunk2mem(newp)));
-            assert((newp->prev_size == offset));
-
-            /* update statistics */
-            sum = av->mmapped_mem += newsize - oldsize;
-            if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) {
-                av->max_mmapped_mem = sum;
-            }
-            sum += av->sbrked_mem;
-            if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) {
-                av->max_total_mem = sum;
-            }
-
-            return chunk2mem(newp);
-        }
-#endif
-
-        /* Note the extra SIZE_SZ overhead. */
-        if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) {
-            newmem = oldmem; /* do nothing */
-        } else {
-            /* Must alloc, copy, free. */
-            newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
-            if (newmem != 0) {
-                MALLOC_COPY(newmem, oldmem, oldsize - 2 * SIZE_SZ);
-                fREe(oldmem);
-            }
-        }
-        return newmem;
-
-#else
-        /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
-        check_malloc_state();
-        MALLOC_FAILURE_ACTION;
-        return 0;
-#endif
-    }
-}
-
-/*
-  ------------------------------ memalign ------------------------------
-*/
-
-#if __STD_C
-Void_t *mEMALIGn(size_t alignment, size_t bytes)
-#else
-Void_t *mEMALIGn(alignment, bytes)
-size_t alignment;
-size_t bytes;
-#endif
-{
-    INTERNAL_SIZE_T nb;          /* padded  request size */
-    char *m;                     /* memory returned by malloc call */
-    mchunkptr p;                 /* corresponding chunk */
-    char *brk;                   /* alignment point within p */
-    mchunkptr newp;              /* chunk to return */
-    INTERNAL_SIZE_T newsize;     /* its size */
-    INTERNAL_SIZE_T leadsize;    /* leading space before alignment point */
-    mchunkptr remainder;         /* spare room at end to split off */
-    CHUNK_SIZE_T remainder_size; /* its size */
-    INTERNAL_SIZE_T size;
-
-    /* If need less alignment than we give anyway, just relay to malloc */
-
-    if (alignment <= MALLOC_ALIGNMENT) {
-        return mALLOc(bytes);
-    }
-
-    /* Otherwise, ensure that it is at least a minimum chunk size */
-
-    if (alignment < MINSIZE) {
-        alignment = MINSIZE;
-    }
-
-    /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
-    if ((alignment & (alignment - 1)) != 0) {
-        size_t a = MALLOC_ALIGNMENT * 2;
-        while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) {
-            a <<= 1;
-        }
-        alignment = a;
-    }
-
-    checked_request2size(bytes, nb);
-
-    /*
-      Strategy: find a spot within that chunk that meets the alignment
-      request, and then possibly free the leading and trailing space.
-    */
-
-    /* Call malloc with worst case padding to hit alignment. */
-
-    m = (char *)(mALLOc(nb + alignment + MINSIZE));
-
-    if (m == 0) {
-        return 0; /* propagate failure */
-    }
-
-    p = mem2chunk(m);
-
-    if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */
-
-        /*
-          Find an aligned spot inside chunk.  Since we need to give back
-          leading space in a chunk of at least MINSIZE, if the first
-          calculation places us at a spot with less than MINSIZE leader,
-          we can move to the next aligned spot -- we've allocated enough
-          total room so that this is always possible.
-        */
-
-        brk = (char *)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) &
-                                           -((signed long)alignment)));
-        if ((CHUNK_SIZE_T)(brk - (char *)(p)) < MINSIZE) {
-            brk += alignment;
-        }
-
-        newp = (mchunkptr)brk;
-        leadsize = brk - (char *)(p);
-        newsize = chunksize(p) - leadsize;
-
-        /* For mmapped chunks, just adjust offset */
-        if (chunk_is_mmapped(p)) {
-            newp->prev_size = p->prev_size + leadsize;
-            set_head(newp, newsize | IS_MMAPPED);
-            return chunk2mem(newp);
-        }
-
-        /* Otherwise, give back leader, use the rest */
-        set_head(newp, newsize | PREV_INUSE);
-        set_inuse_bit_at_offset(newp, newsize);
-        set_head_size(p, leadsize);
-        fREe(chunk2mem(p));
-        p = newp;
-
-        assert(newsize >= nb &&
-               (((PTR_UINT)(chunk2mem(p))) % alignment) == 0);
-    }
-
-    /* Also give back spare room at the end */
-    if (!chunk_is_mmapped(p)) {
-        size = chunksize(p);
-        if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
-            remainder_size = size - nb;
-            remainder = chunk_at_offset(p, nb);
-            set_head(remainder, remainder_size | PREV_INUSE);
-            set_head_size(p, nb);
-            fREe(chunk2mem(remainder));
-        }
-    }
-
-    check_inuse_chunk(p);
-    return chunk2mem(p);
-}
-
-/*
-  ------------------------------ calloc ------------------------------
-*/
-
-#if __STD_C
-Void_t *cALLOc(size_t n_elements, size_t elem_size)
-#else
-Void_t *cALLOc(n_elements, elem_size)
-size_t n_elements;
-size_t elem_size;
-#endif
-{
-    mchunkptr p;
-    CHUNK_SIZE_T clearsize;
-    CHUNK_SIZE_T nclears;
-    INTERNAL_SIZE_T *d;
-
-    Void_t *mem = mALLOc(n_elements * elem_size);
-
-    if (mem != 0) {
-        p = mem2chunk(mem);
-
-        if (!chunk_is_mmapped(p)) {
-            /*
-              Unroll clear of <= 36 bytes (72 if 8byte sizes)
-              We know that contents have an odd number of
-              INTERNAL_SIZE_T-sized words; minimally 3.
-            */
-
-            d = (INTERNAL_SIZE_T *)mem;
-            clearsize = chunksize(p) - SIZE_SZ;
-            nclears = clearsize / sizeof(INTERNAL_SIZE_T);
-            assert(nclears >= 3);
-
-            if (nclears > 9) {
-                MALLOC_ZERO(d, clearsize);
-            }
-
-            else {
-                *(d + 0) = 0;
-                *(d + 1) = 0;
-                *(d + 2) = 0;
-                if (nclears > 4) {
-                    *(d + 3) = 0;
-                    *(d + 4) = 0;
-                    if (nclears > 6) {
-                        *(d + 5) = 0;
-                        *(d + 6) = 0;
-                        if (nclears > 8) {
-                            *(d + 7) = 0;
-                            *(d + 8) = 0;
-                        }
-                    }
-                }
-            }
-        }
-#if !MMAP_CLEARS
-        else {
-            d = (INTERNAL_SIZE_T *)mem;
-            /*
-              Note the additional SIZE_SZ
-            */
-            clearsize = chunksize(p) - 2 * SIZE_SZ;
-            MALLOC_ZERO(d, clearsize);
-        }
-#endif
-    }
-    return mem;
-}
-
-/*
-  ------------------------------ cfree ------------------------------
-*/
-
-#if __STD_C
-void cFREe(Void_t *mem)
-#else
-void cFREe(mem) Void_t *mem;
-#endif
-{
-    fREe(mem);
-}
-
-/*
-  ------------------------- independent_calloc -------------------------
-*/
-
-#if __STD_C
-Void_t **iCALLOc(size_t n_elements, size_t elem_size, Void_t *chunks[])
-#else
-Void_t **iCALLOc(n_elements, elem_size, chunks)
-size_t n_elements;
-size_t elem_size;
-Void_t *chunks[];
-#endif
-{
-    size_t sz = elem_size; /* serves as 1-element array */
-    /* opts arg of 3 means all elements are same size, and should be cleared */
-    return iALLOc(n_elements, &sz, 3, chunks);
-}
-
-/*
-  ------------------------- independent_comalloc -------------------------
-*/
-
-#if __STD_C
-Void_t **iCOMALLOc(size_t n_elements, size_t sizes[], Void_t *chunks[])
-#else
-Void_t **iCOMALLOc(n_elements, sizes, chunks)
-size_t n_elements;
-size_t sizes[];
-Void_t *chunks[];
-#endif
-{
-    return iALLOc(n_elements, sizes, 0, chunks);
-}
-
-/*
-  ------------------------------ ialloc ------------------------------
-  ialloc provides common support for independent_X routines, handling all of
-  the combinations that can result.
-
-  The opts arg has:
-    bit 0 set if all elements are same size (using sizes[0])
-    bit 1 set if elements should be zeroed
-*/
-
-#if __STD_C
-static Void_t **iALLOc(size_t n_elements,
-                       size_t *sizes,
-                       int opts,
-                       Void_t *chunks[])
-#else
-static Void_t **iALLOc(n_elements, sizes, opts, chunks)
-size_t n_elements;
-size_t *sizes;
-int opts;
-Void_t *chunks[];
-#endif
-{
-    mstate av = get_malloc_state();
-    INTERNAL_SIZE_T element_size;   /* chunksize of each element, if all same */
-    INTERNAL_SIZE_T contents_size;  /* total size of elements */
-    INTERNAL_SIZE_T array_size;     /* request size of pointer array */
-    Void_t *mem;                    /* malloced aggregate space */
-    mchunkptr p;                    /* corresponding chunk */
-    INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */
-    Void_t **marray;                /* either "chunks" or malloced ptr array */
-    mchunkptr array_chunk;          /* chunk for malloced ptr array */
-    int mmx;                        /* to disable mmap */
-    INTERNAL_SIZE_T size;
-    size_t i;
-
-    /* Ensure initialization */
-    if (av->max_fast == 0) {
-        malloc_consolidate(av);
-    }
-
-    /* compute array length, if needed */
-    if (chunks != 0) {
-        if (n_elements == 0) {
-            return chunks; /* nothing to do */
-        }
-        marray = chunks;
-        array_size = 0;
-    } else {
-        /* if empty req, must still return chunk representing empty array */
-        if (n_elements == 0) {
-            return (Void_t **)mALLOc(0);
-        }
-        marray = 0;
-        array_size = request2size(n_elements * (sizeof(Void_t *)));
-    }
-
-    /* compute total element size */
-    if (opts & 0x1) { /* all-same-size */
-        element_size = request2size(*sizes);
-        contents_size = n_elements * element_size;
-    } else { /* add up all the sizes */
-        element_size = 0;
-        contents_size = 0;
-        for (i = 0; i != n_elements; ++i) {
-            contents_size += request2size(sizes[i]);
-        }
-    }
-
-    /* subtract out alignment bytes from total to minimize overallocation */
-    size = contents_size + array_size - MALLOC_ALIGN_MASK;
-
-    /*
-       Allocate the aggregate chunk.
-       But first disable mmap so malloc won't use it, since
-       we would not be able to later free/realloc space internal
-       to a segregated mmap region.
-   */
-    mmx = av->n_mmaps_max; /* disable mmap */
-    av->n_mmaps_max = 0;
-    mem = mALLOc(size);
-    av->n_mmaps_max = mmx; /* reset mmap */
-    if (mem == 0) {
-        return 0;
-    }
-
-    p = mem2chunk(mem);
-    assert(!chunk_is_mmapped(p));
-    remainder_size = chunksize(p);
-
-    if (opts & 0x2) { /* optionally clear the elements */
-        MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size);
-    }
-
-    /* If not provided, allocate the pointer array as final part of chunk */
-    if (marray == 0) {
-        array_chunk = chunk_at_offset(p, contents_size);
-        marray = (Void_t **)(chunk2mem(array_chunk));
-        set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE);
-        remainder_size = contents_size;
-    }
-
-    /* split out elements */
-    for (i = 0;; ++i) {
-        marray[i] = chunk2mem(p);
-        if (i != n_elements - 1) {
-            if (element_size != 0) {
-                size = element_size;
-            } else {
-                size = request2size(sizes[i]);
-            }
-            remainder_size -= size;
-            set_head(p, size | PREV_INUSE);
-            p = chunk_at_offset(p, size);
-        } else { /* the final element absorbs any overallocation slop */
-            set_head(p, remainder_size | PREV_INUSE);
-            break;
-        }
-    }
-
-#if DL_DEBUG
-    if (marray != chunks) {
-        /* final element must have exactly exhausted chunk */
-        if (element_size != 0) {
-            assert(remainder_size == element_size);
-        } else {
-            assert(remainder_size == request2size(sizes[i]));
-        }
-        check_inuse_chunk(mem2chunk(marray));
-    }
-
-    for (i = 0; i != n_elements; ++i) {
-        check_inuse_chunk(mem2chunk(marray[i]));
-    }
-#endif
-
-    return marray;
-}
-
-/*
-  ------------------------------ valloc ------------------------------
-*/
-
-#if __STD_C
-Void_t *vALLOc(size_t bytes)
-#else
-Void_t *vALLOc(bytes)
-size_t bytes;
-#endif
-{
-    /* Ensure initialization */
-    mstate av = get_malloc_state();
-    if (av->max_fast == 0) {
-        malloc_consolidate(av);
-    }
-    return mEMALIGn(av->pagesize, bytes);
-}
-
-/*
-  ------------------------------ pvalloc ------------------------------
-*/
-
-#if __STD_C
-Void_t *pVALLOc(size_t bytes)
-#else
-Void_t *pVALLOc(bytes)
-size_t bytes;
-#endif
-{
-    mstate av = get_malloc_state();
-    size_t pagesz;
-
-    /* Ensure initialization */
-    if (av->max_fast == 0) {
-        malloc_consolidate(av);
-    }
-    pagesz = av->pagesize;
-    return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
-}
-
-/*
-  ------------------------------ malloc_trim ------------------------------
-*/
-
-#if __STD_C
-int mTRIm(size_t pad)
-#else
-int mTRIm(pad)
-size_t pad;
-#endif
-{
-    mstate av = get_malloc_state();
-    /* Ensure initialization/consolidation */
-    malloc_consolidate(av);
-
-#ifndef MORECORE_CANNOT_TRIM
-    return sYSTRIm(pad, av);
-#else
-    return 0;
-#endif
-}
-
-/*
-  ------------------------- malloc_usable_size -------------------------
-*/
-
-#if __STD_C
-size_t mUSABLe(Void_t *mem)
-#else
-size_t mUSABLe(mem)
-Void_t *mem;
-#endif
-{
-    mchunkptr p;
-    if (mem != 0) {
-        p = mem2chunk(mem);
-        if (chunk_is_mmapped(p)) {
-            return chunksize(p) - 2 * SIZE_SZ;
-        } else if (inuse(p)) {
-            return chunksize(p) - SIZE_SZ;
-        }
-    }
-    return 0;
-}
-
-/*
-  ------------------------------ mallinfo ------------------------------
-*/
-
-struct mallinfo mALLINFo() {
-    mstate av = get_malloc_state();
-    struct mallinfo mi;
-    int i;
-    mbinptr b;
-    mchunkptr p;
-    INTERNAL_SIZE_T avail;
-    INTERNAL_SIZE_T fastavail;
-    int nblocks;
-    int nfastblocks;
-
-    /* Ensure initialization */
-    if (av->top == 0) {
-        malloc_consolidate(av);
-    }
-
-    check_malloc_state();
-
-    /* Account for top */
-    avail = chunksize(av->top);
-    nblocks = 1; /* top always exists */
-
-    /* traverse fastbins */
-    nfastblocks = 0;
-    fastavail = 0;
-
-    for (i = 0; NFASTBINS - i > 0; ++i) {
-        for (p = av->fastbins[i]; p != 0; p = p->fd) {
-            ++nfastblocks;
-            fastavail += chunksize(p);
-        }
-    }
-
-    avail += fastavail;
-
-    /* traverse regular bins */
-    for (i = 1; i < NBINS; ++i) {
-        b = bin_at(av, i);
-        for (p = last(b); p != b; p = p->bk) {
-            ++nblocks;
-            avail += chunksize(p);
-        }
-    }
-
-    mi.smblks = nfastblocks;
-    mi.ordblks = nblocks;
-    mi.fordblks = avail;
-    mi.uordblks = av->sbrked_mem - avail;
-    mi.arena = av->sbrked_mem;
-    mi.hblks = av->n_mmaps;
-    mi.hblkhd = av->mmapped_mem;
-    mi.fsmblks = fastavail;
-    mi.keepcost = chunksize(av->top);
-    mi.usmblks = av->max_total_mem;
-    return mi;
-}
-
-/*
-  ------------------------------ malloc_stats ------------------------------
-*/
-
-void mSTATs(void) {
-    struct mallinfo mi = mALLINFo();
-
-#ifdef WIN32
-    {
-        CHUNK_SIZE_T free, reserved, committed;
-        vminfo(&free, &reserved, &committed);
-        fprintf(stderr, "free bytes       = %10lu\n",
-                free);
-        fprintf(stderr, "reserved bytes   = %10lu\n",
-                reserved);
-        fprintf(stderr, "committed bytes  = %10lu\n",
-                committed);
-    }
-#endif
-
-    fprintf(stderr, "max system bytes = %10lu\n",
-            (CHUNK_SIZE_T)(mi.usmblks));
-    fprintf(stderr, "system bytes     = %10lu\n",
-            (CHUNK_SIZE_T)(mi.arena + mi.hblkhd));
-    fprintf(stderr, "in use bytes     = %10lu\n",
-            (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd));
-
-#ifdef WIN32
-    {
-        CHUNK_SIZE_T kernel, user;
-        if (cpuinfo(TRUE, &kernel, &user)) {
-            fprintf(stderr, "kernel ms        = %10lu\n",
-                    kernel);
-            fprintf(stderr, "user ms          = %10lu\n",
-                    user);
-        }
-    }
-#endif
-}
-
-/*
-  ------------------------------ mallopt ------------------------------
-*/
-
-#if __STD_C
-int mALLOPt(int param_number, int value)
-#else
-int mALLOPt(param_number, value)
-int param_number;
-int value;
-#endif
-{
-    mstate av = get_malloc_state();
-    /* Ensure initialization/consolidation */
-    malloc_consolidate(av);
-
-    switch (param_number) {
-    case M_MXFAST:
-        if (value >= 0 && value <= MAX_FAST_SIZE) {
-            set_max_fast(av, value);
-            return 1;
-        } else {
-            return 0;
-        }
-
-    case M_TRIM_THRESHOLD:
-        av->trim_threshold = value;
-        return 1;
-
-    case M_TOP_PAD:
-        av->top_pad = value;
-        return 1;
-
-    case M_MMAP_THRESHOLD:
-        av->mmap_threshold = value;
-        return 1;
-
-    case M_MMAP_MAX:
-#if !HAVE_MMAP
-        if (value != 0) {
-            return 0;
-        }
-#endif
-        av->n_mmaps_max = value;
-        return 1;
-
-    default:
-        return 0;
-    }
-}
-
-/*
-  -------------------- Alternative MORECORE functions --------------------
-*/
-
-/*
-  General Requirements for MORECORE.
-
-  The MORECORE function must have the following properties:
-
-  If MORECORE_CONTIGUOUS is false:
-
-    * MORECORE must allocate in multiples of pagesize. It will
-      only be called with arguments that are multiples of pagesize.
-
-    * MORECORE(0) must return an address that is at least
-      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
-
-  else (i.e. If MORECORE_CONTIGUOUS is true):
-
-    * Consecutive calls to MORECORE with positive arguments
-      return increasing addresses, indicating that space has been
-      contiguously extended.
-
-    * MORECORE need not allocate in multiples of pagesize.
-      Calls to MORECORE need not have args of multiples of pagesize.
-
-    * MORECORE need not page-align.
-
-  In either case:
-
-    * MORECORE may allocate more memory than requested. (Or even less,
-      but this will generally result in a malloc failure.)
-
-    * MORECORE must not allocate memory when given argument zero, but
-      instead return one past the end address of memory from previous
-      nonzero call. This malloc does NOT call MORECORE(0)
-      until at least one call with positive arguments is made, so
-      the initial value returned is not important.
-
-    * Even though consecutive calls to MORECORE need not return contiguous
-      addresses, it must be OK for malloc'ed chunks to span multiple
-      regions in those cases where they do happen to be contiguous.
-
-    * MORECORE need not handle negative arguments -- it may instead
-      just return MORECORE_FAILURE when given negative arguments.
-      Negative arguments are always multiples of pagesize. MORECORE
-      must not misinterpret negative args as large positive unsigned
-      args. You can suppress all such calls from even occurring by defining
-      MORECORE_CANNOT_TRIM,
-
-  There is some variation across systems about the type of the
-  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
-  actually be size_t, because sbrk supports negative args, so it is
-  normally the signed type of the same width as size_t (sometimes
-  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
-  matter though. Internally, we use "long" as arguments, which should
-  work across all reasonable possibilities.
-
-  Additionally, if MORECORE ever returns failure for a positive
-  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
-  system allocator. This is a useful backup strategy for systems with
-  holes in address spaces -- in this case sbrk cannot contiguously
-  expand the heap, but mmap may be able to map noncontiguous space.
-
-  If you'd like mmap to ALWAYS be used, you can define MORECORE to be
-  a function that always returns MORECORE_FAILURE.
-
-  Malloc only has limited ability to detect failures of MORECORE
-  to supply contiguous space when it says it can. In particular,
-  multithreaded programs that do not use locks may result in
-  rece conditions across calls to MORECORE that result in gaps
-  that cannot be detected as such, and subsequent corruption.
-
-  If you are using this malloc with something other than sbrk (or its
-  emulation) to supply memory regions, you probably want to set
-  MORECORE_CONTIGUOUS as false.  As an example, here is a custom
-  allocator kindly contributed for pre-OSX macOS.  It uses virtually
-  but not necessarily physically contiguous non-paged memory (locked
-  in, present and won't get swapped out).  You can use it by
-  uncommenting this section, adding some #includes, and setting up the
-  appropriate defines above:
-
-      #define MORECORE osMoreCore
-      #define MORECORE_CONTIGUOUS 0
-
-  There is also a shutdown routine that should somehow be called for
-  cleanup upon program exit.
-
-  #define MAX_POOL_ENTRIES 100
-  #define MINIMUM_MORECORE_SIZE  (64 * 1024)
-  static int next_os_pool;
-  void *our_os_pools[MAX_POOL_ENTRIES];
-
-  void *osMoreCore(int size)
-  {
-    void *ptr = 0;
-    static void *sbrk_top = 0;
-
-    if (size > 0)
-    {
-      if (size < MINIMUM_MORECORE_SIZE)
-         size = MINIMUM_MORECORE_SIZE;
-      if (CurrentExecutionLevel() == kTaskLevel)
-         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
-      if (ptr == 0)
-      {
-        return (void *) MORECORE_FAILURE;
-      }
-      // save ptrs so they can be freed during cleanup
-      our_os_pools[next_os_pool] = ptr;
-      next_os_pool++;
-      ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
-      sbrk_top = (char *) ptr + size;
-      return ptr;
-    }
-    else if (size < 0)
-    {
-      // we don't currently support shrink behavior
-      return (void *) MORECORE_FAILURE;
-    }
-    else
-    {
-      return sbrk_top;
-    }
-  }
-
-  // cleanup any allocated memory pools
-  // called as last thing before shutting down driver
-
-  void osCleanupMem(void)
-  {
-    void **ptr;
-
-    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
-      if (*ptr)
-      {
-         PoolDeallocate(*ptr);
-         *ptr = 0;
-      }
-  }
-
-*/
-
-/*
-  --------------------------------------------------------------
-
-  Emulation of sbrk for win32.
-  Donated by J. Walter <Walter@GeNeSys-e.de>.
-  For additional information about this code, and malloc on Win32, see
-     http://www.genesys-e.de/jwalter/
-*/
-
-#ifdef WIN32
-
-#ifdef _DEBUG
-/* #define TRACE */
-#endif
-
-/* Support for USE_MALLOC_LOCK */
-#ifdef USE_MALLOC_LOCK
-
-/* Wait for spin lock */
-static int slwait(int *sl) {
-    while (InterlockedCompareExchange((void **)sl, (void *)1, (void *)0) != 0) {
-        Sleep(0);
-    }
-    return 0;
-}
-
-/* Release spin lock */
-static int slrelease(int *sl) {
-    InterlockedExchange(sl, 0);
-    return 0;
-}
-
-#ifdef NEEDED
-/* Spin lock for emulation code */
-static int g_sl;
-#endif
-
-#endif /* USE_MALLOC_LOCK */
-
-/* getpagesize for windows */
-static long getpagesize(void) {
-    static long g_pagesize = 0;
-    if (!g_pagesize) {
-        SYSTEM_INFO system_info;
-        GetSystemInfo(&system_info);
-        g_pagesize = system_info.dwPageSize;
-    }
-    return g_pagesize;
-}
-static long getregionsize(void) {
-    static long g_regionsize = 0;
-    if (!g_regionsize) {
-        SYSTEM_INFO system_info;
-        GetSystemInfo(&system_info);
-        g_regionsize = system_info.dwAllocationGranularity;
-    }
-    return g_regionsize;
-}
-
-/* A region list entry */
-typedef struct _region_list_entry {
-    void *top_allocated;
-    void *top_committed;
-    void *top_reserved;
-    long reserve_size;
-    struct _region_list_entry *previous;
-} region_list_entry;
-
-/* Allocate and link a region entry in the region list */
-static int region_list_append(region_list_entry **last, void *base_reserved, long reserve_size) {
-    region_list_entry *next = HeapAlloc(GetProcessHeap(), 0, sizeof(region_list_entry));
-    if (!next) {
-        return FALSE;
-    }
-    next->top_allocated = (char *)base_reserved;
-    next->top_committed = (char *)base_reserved;
-    next->top_reserved = (char *)base_reserved + reserve_size;
-    next->reserve_size = reserve_size;
-    next->previous = *last;
-    *last = next;
-    return TRUE;
-}
-/* Free and unlink the last region entry from the region list */
-static int region_list_remove(region_list_entry **last) {
-    region_list_entry *previous = (*last)->previous;
-    if (!HeapFree(GetProcessHeap(), sizeof(region_list_entry), *last)) {
-        return FALSE;
-    }
-    *last = previous;
-    return TRUE;
-}
-
-#define CEIL(size, to) (((size) + (to) - 1) & ~((to) - 1))
-#define FLOOR(size, to) ((size) & ~((to) - 1))
-
-#define SBRK_SCALE 0
-/* #define SBRK_SCALE  1 */
-/* #define SBRK_SCALE  2 */
-/* #define SBRK_SCALE  4  */
-
-/* sbrk for windows */
-static void *sbrk(long size) {
-    static long g_pagesize, g_my_pagesize;
-    static long g_regionsize, g_my_regionsize;
-    static region_list_entry *g_last;
-    void *result = (void *)MORECORE_FAILURE;
-#ifdef TRACE
-    printf("sbrk %d\n", size);
-#endif
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Wait for spin lock */
-    slwait(&g_sl);
-#endif
-    /* First time initialization */
-    if (!g_pagesize) {
-        g_pagesize = getpagesize();
-        g_my_pagesize = g_pagesize << SBRK_SCALE;
-    }
-    if (!g_regionsize) {
-        g_regionsize = getregionsize();
-        g_my_regionsize = g_regionsize << SBRK_SCALE;
-    }
-    if (!g_last) {
-        if (!region_list_append(&g_last, 0, 0)) {
-            goto sbrk_exit;
-        }
-    }
-    /* Assert invariants */
-    assert(g_last);
-    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated &&
-           g_last->top_allocated <= g_last->top_committed);
-    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed &&
-           g_last->top_committed <= g_last->top_reserved &&
-           (unsigned)g_last->top_committed % g_pagesize == 0);
-    assert((unsigned)g_last->top_reserved % g_regionsize == 0);
-    assert((unsigned)g_last->reserve_size % g_regionsize == 0);
-    /* Allocation requested? */
-    if (size >= 0) {
-        /* Allocation size is the requested size */
-        long allocate_size = size;
-        /* Compute the size to commit */
-        long to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
-        /* Do we reach the commit limit? */
-        if (to_commit > 0) {
-            /* Round size to commit */
-            long commit_size = CEIL(to_commit, g_my_pagesize);
-            /* Compute the size to reserve */
-            long to_reserve = (char *)g_last->top_committed + commit_size - (char *)g_last->top_reserved;
-            /* Do we reach the reserve limit? */
-            if (to_reserve > 0) {
-                /* Compute the remaining size to commit in the current region */
-                long remaining_commit_size = (char *)g_last->top_reserved - (char *)g_last->top_committed;
-                if (remaining_commit_size > 0) {
-                    /* Assert preconditions */
-                    assert((unsigned)g_last->top_committed % g_pagesize == 0);
-                    assert(0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0);
-                    {
-                        /* Commit this */
-                        void *base_committed = VirtualAlloc(g_last->top_committed, remaining_commit_size,
-                                                            MEM_COMMIT, PAGE_READWRITE);
-                        /* Check returned pointer for consistency */
-                        if (base_committed != g_last->top_committed) {
-                            goto sbrk_exit;
-                        }
-                        /* Assert postconditions */
-                        assert((unsigned)base_committed % g_pagesize == 0);
-#ifdef TRACE
-                        printf("Commit %p %d\n", base_committed, remaining_commit_size);
-#endif
-                        /* Adjust the regions commit top */
-                        g_last->top_committed = (char *)base_committed + remaining_commit_size;
-                    }
-                }
-                {
-                    /* Now we are going to search and reserve. */
-                    int contiguous = -1;
-                    int found = FALSE;
-                    MEMORY_BASIC_INFORMATION memory_info;
-                    void *base_reserved;
-                    long reserve_size;
-                    do {
-                        /* Assume contiguous memory */
-                        contiguous = TRUE;
-                        /* Round size to reserve */
-                        reserve_size = CEIL(to_reserve, g_my_regionsize);
-                        /* Start with the current region's top */
-                        memory_info.BaseAddress = g_last->top_reserved;
-                        /* Assert preconditions */
-                        assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
-                        assert(0 < reserve_size && reserve_size % g_regionsize == 0);
-                        while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) {
-                            /* Assert postconditions */
-                            assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
-#ifdef TRACE
-                            printf("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize,
-                                   memory_info.State == MEM_FREE ? "FREE" : (memory_info.State == MEM_RESERVE ? "RESERVED" : (memory_info.State == MEM_COMMIT ? "COMMITTED" : "?")));
-#endif
-                            /* Region is free, well aligned and big enough: we are done */
-                            if (memory_info.State == MEM_FREE &&
-                                (unsigned)memory_info.BaseAddress % g_regionsize == 0 &&
-                                memory_info.RegionSize >= (unsigned)reserve_size) {
-                                found = TRUE;
-                                break;
-                            }
-                            /* From now on we can't get contiguous memory! */
-                            contiguous = FALSE;
-                            /* Recompute size to reserve */
-                            reserve_size = CEIL(allocate_size, g_my_regionsize);
-                            memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize;
-                            /* Assert preconditions */
-                            assert((unsigned)memory_info.BaseAddress % g_pagesize == 0);
-                            assert(0 < reserve_size && reserve_size % g_regionsize == 0);
-                        }
-                        /* Search failed? */
-                        if (!found) {
-                            goto sbrk_exit;
-                        }
-                        /* Assert preconditions */
-                        assert((unsigned)memory_info.BaseAddress % g_regionsize == 0);
-                        assert(0 < reserve_size && reserve_size % g_regionsize == 0);
-                        /* Try to reserve this */
-                        base_reserved = VirtualAlloc(memory_info.BaseAddress, reserve_size,
-                                                     MEM_RESERVE, PAGE_NOACCESS);
-                        if (!base_reserved) {
-                            int rc = GetLastError();
-                            if (rc != ERROR_INVALID_ADDRESS) {
-                                goto sbrk_exit;
-                            }
-                        }
-                        /* A null pointer signals (hopefully) a race condition with another thread. */
-                        /* In this case, we try again. */
-                    } while (!base_reserved);
-                    /* Check returned pointer for consistency */
-                    if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress) {
-                        goto sbrk_exit;
-                    }
-                    /* Assert postconditions */
-                    assert((unsigned)base_reserved % g_regionsize == 0);
-#ifdef TRACE
-                    printf("Reserve %p %d\n", base_reserved, reserve_size);
-#endif
-                    /* Did we get contiguous memory? */
-                    if (contiguous) {
-                        long start_size = (char *)g_last->top_committed - (char *)g_last->top_allocated;
-                        /* Adjust allocation size */
-                        allocate_size -= start_size;
-                        /* Adjust the regions allocation top */
-                        g_last->top_allocated = g_last->top_committed;
-                        /* Recompute the size to commit */
-                        to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
-                        /* Round size to commit */
-                        commit_size = CEIL(to_commit, g_my_pagesize);
-                    }
-                    /* Append the new region to the list */
-                    if (!region_list_append(&g_last, base_reserved, reserve_size)) {
-                        goto sbrk_exit;
-                    }
-                    /* Didn't we get contiguous memory? */
-                    if (!contiguous) {
-                        /* Recompute the size to commit */
-                        to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed;
-                        /* Round size to commit */
-                        commit_size = CEIL(to_commit, g_my_pagesize);
-                    }
-                }
-            }
-            /* Assert preconditions */
-            assert((unsigned)g_last->top_committed % g_pagesize == 0);
-            assert(0 < commit_size && commit_size % g_pagesize == 0);
-            {
-                /* Commit this */
-                void *base_committed = VirtualAlloc(g_last->top_committed, commit_size,
-                                                    MEM_COMMIT, PAGE_READWRITE);
-                /* Check returned pointer for consistency */
-                if (base_committed != g_last->top_committed) {
-                    goto sbrk_exit;
-                }
-                /* Assert postconditions */
-                assert((unsigned)base_committed % g_pagesize == 0);
-#ifdef TRACE
-                printf("Commit %p %d\n", base_committed, commit_size);
-#endif
-                /* Adjust the regions commit top */
-                g_last->top_committed = (char *)base_committed + commit_size;
-            }
-        }
-        /* Adjust the regions allocation top */
-        g_last->top_allocated = (char *)g_last->top_allocated + allocate_size;
-        result = (char *)g_last->top_allocated - size;
-        /* Deallocation requested? */
-    } else if (size < 0) {
-        long deallocate_size = -size;
-        /* As long as we have a region to release */
-        while ((char *)g_last->top_allocated - deallocate_size < (char *)g_last->top_reserved - g_last->reserve_size) {
-            /* Get the size to release */
-            long release_size = g_last->reserve_size;
-            /* Get the base address */
-            void *base_reserved = (char *)g_last->top_reserved - release_size;
-            /* Assert preconditions */
-            assert((unsigned)base_reserved % g_regionsize == 0);
-            assert(0 < release_size && release_size % g_regionsize == 0);
-            {
-                /* Release this */
-                int rc = VirtualFree(base_reserved, 0,
-                                     MEM_RELEASE);
-                /* Check returned code for consistency */
-                if (!rc) {
-                    goto sbrk_exit;
-                }
-#ifdef TRACE
-                printf("Release %p %d\n", base_reserved, release_size);
-#endif
-            }
-            /* Adjust deallocation size */
-            deallocate_size -= (char *)g_last->top_allocated - (char *)base_reserved;
-            /* Remove the old region from the list */
-            if (!region_list_remove(&g_last)) {
-                goto sbrk_exit;
-            }
-        }
-        {
-            /* Compute the size to decommit */
-            long to_decommit = (char *)g_last->top_committed - ((char *)g_last->top_allocated - deallocate_size);
-            if (to_decommit >= g_my_pagesize) {
-                /* Compute the size to decommit */
-                long decommit_size = FLOOR(to_decommit, g_my_pagesize);
-                /*  Compute the base address */
-                void *base_committed = (char *)g_last->top_committed - decommit_size;
-                /* Assert preconditions */
-                assert((unsigned)base_committed % g_pagesize == 0);
-                assert(0 < decommit_size && decommit_size % g_pagesize == 0);
-                {
-                    /* Decommit this */
-                    int rc = VirtualFree((char *)base_committed, decommit_size,
-                                         MEM_DECOMMIT);
-                    /* Check returned code for consistency */
-                    if (!rc) {
-                        goto sbrk_exit;
-                    }
-#ifdef TRACE
-                    printf("Decommit %p %d\n", base_committed, decommit_size);
-#endif
-                }
-                /* Adjust deallocation size and regions commit and allocate top */
-                deallocate_size -= (char *)g_last->top_allocated - (char *)base_committed;
-                g_last->top_committed = base_committed;
-                g_last->top_allocated = base_committed;
-            }
-        }
-        /* Adjust regions allocate top */
-        g_last->top_allocated = (char *)g_last->top_allocated - deallocate_size;
-        /* Check for underflow */
-        if ((char *)g_last->top_reserved - g_last->reserve_size > (char *)g_last->top_allocated ||
-            g_last->top_allocated > g_last->top_committed) {
-            /* Adjust regions allocate top */
-            g_last->top_allocated = (char *)g_last->top_reserved - g_last->reserve_size;
-            goto sbrk_exit;
-        }
-        result = g_last->top_allocated;
-    }
-    /* Assert invariants */
-    assert(g_last);
-    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated &&
-           g_last->top_allocated <= g_last->top_committed);
-    assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed &&
-           g_last->top_committed <= g_last->top_reserved &&
-           (unsigned)g_last->top_committed % g_pagesize == 0);
-    assert((unsigned)g_last->top_reserved % g_regionsize == 0);
-    assert((unsigned)g_last->reserve_size % g_regionsize == 0);
-
-sbrk_exit:
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Release spin lock */
-    slrelease(&g_sl);
-#endif
-    return result;
-}
-
-/* mmap for windows */
-static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg) {
-    static long g_pagesize;
-    static long g_regionsize;
-#ifdef TRACE
-    printf("mmap %d\n", size);
-#endif
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Wait for spin lock */
-    slwait(&g_sl);
-#endif
-    /* First time initialization */
-    if (!g_pagesize) {
-        g_pagesize = getpagesize();
-    }
-    if (!g_regionsize) {
-        g_regionsize = getregionsize();
-    }
-    /* Assert preconditions */
-    assert((unsigned)ptr % g_regionsize == 0);
-    assert(size % g_pagesize == 0);
-    /* Allocate this */
-    ptr = VirtualAlloc(ptr, size,
-                       MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE);
-    if (!ptr) {
-        ptr = (void *)MORECORE_FAILURE;
-        goto mmap_exit;
-    }
-    /* Assert postconditions */
-    assert((unsigned)ptr % g_regionsize == 0);
-#ifdef TRACE
-    printf("Commit %p %d\n", ptr, size);
-#endif
-mmap_exit:
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Release spin lock */
-    slrelease(&g_sl);
-#endif
-    return ptr;
-}
-
-/* munmap for windows */
-static long munmap(void *ptr, long size) {
-    static long g_pagesize;
-    static long g_regionsize;
-    int rc = MUNMAP_FAILURE;
-#ifdef TRACE
-    printf("munmap %p %d\n", ptr, size);
-#endif
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Wait for spin lock */
-    slwait(&g_sl);
-#endif
-    /* First time initialization */
-    if (!g_pagesize) {
-        g_pagesize = getpagesize();
-    }
-    if (!g_regionsize) {
-        g_regionsize = getregionsize();
-    }
-    /* Assert preconditions */
-    assert((unsigned)ptr % g_regionsize == 0);
-    assert(size % g_pagesize == 0);
-    /* Free this */
-    if (!VirtualFree(ptr, 0,
-                     MEM_RELEASE)) {
-        goto munmap_exit;
-    }
-    rc = 0;
-#ifdef TRACE
-    printf("Release %p %d\n", ptr, size);
-#endif
-munmap_exit:
-#if defined(USE_MALLOC_LOCK) && defined(NEEDED)
-    /* Release spin lock */
-    slrelease(&g_sl);
-#endif
-    return rc;
-}
-
-static void vminfo(CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T *committed) {
-    MEMORY_BASIC_INFORMATION memory_info;
-    memory_info.BaseAddress = 0;
-    *free = *reserved = *committed = 0;
-    while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) {
-        switch (memory_info.State) {
-        case MEM_FREE:
-            *free += memory_info.RegionSize;
-            break;
-        case MEM_RESERVE:
-            *reserved += memory_info.RegionSize;
-            break;
-        case MEM_COMMIT:
-            *committed += memory_info.RegionSize;
-            break;
-        }
-        memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize;
-    }
-}
-
-static int cpuinfo(int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) {
-    if (whole) {
-        __int64 creation64, exit64, kernel64, user64;
-        int rc = GetProcessTimes(GetCurrentProcess(),
-                                 (FILETIME *)&creation64,
-                                 (FILETIME *)&exit64,
-                                 (FILETIME *)&kernel64,
-                                 (FILETIME *)&user64);
-        if (!rc) {
-            *kernel = 0;
-            *user = 0;
-            return FALSE;
-        }
-        *kernel = (CHUNK_SIZE_T)(kernel64 / 10000);
-        *user = (CHUNK_SIZE_T)(user64 / 10000);
-        return TRUE;
-    } else {
-        __int64 creation64, exit64, kernel64, user64;
-        int rc = GetThreadTimes(GetCurrentThread(),
-                                (FILETIME *)&creation64,
-                                (FILETIME *)&exit64,
-                                (FILETIME *)&kernel64,
-                                (FILETIME *)&user64);
-        if (!rc) {
-            *kernel = 0;
-            *user = 0;
-            return FALSE;
-        }
-        *kernel = (CHUNK_SIZE_T)(kernel64 / 10000);
-        *user = (CHUNK_SIZE_T)(user64 / 10000);
-        return TRUE;
-    }
-}
-
-#endif /* WIN32 */
-
-/* ------------------------------------------------------------
-History:
-    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
-      * Fix malloc_state bitmap array misdeclaration
-
-    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
-      * Allow tuning of FIRST_SORTED_BIN_SIZE
-      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
-      * Better detection and support for non-contiguousness of MORECORE.
-        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
-      * Bypass most of malloc if no frees. Thanks To Emery Berger.
-      * Fix freeing of old top non-contiguous chunk im sysmalloc.
-      * Raised default trim and map thresholds to 256K.
-      * Fix mmap-related #defines. Thanks to Lubos Lunak.
-      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
-      * Branch-free bin calculation
-      * Default trim and mmap thresholds now 256K.
-
-    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
-      * Introduce independent_comalloc and independent_calloc.
-        Thanks to Michael Pachos for motivation and help.
-      * Make optional .h file available
-      * Allow > 2GB requests on 32bit systems.
-      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
-        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
-        and Anonymous.
-      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
-        helping test this.)
-      * memalign: check alignment arg
-      * realloc: don't try to shift chunks backwards, since this
-        leads to  more fragmentation in some programs and doesn't
-        seem to help in any others.
-      * Collect all cases in malloc requiring system memory into sYSMALLOc
-      * Use mmap as backup to sbrk
-      * Place all internal state in malloc_state
-      * Introduce fastbins (although similar to 2.5.1)
-      * Many minor tunings and cosmetic improvements
-      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
-      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
-        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
-      * Include errno.h to support default failure action.
-
-    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
-      * return null for negative arguments
-      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
-         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
-          (e.g. WIN32 platforms)
-         * Cleanup header file inclusion for WIN32 platforms
-         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
-         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
-           memory allocation routines
-         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
-         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
-           usage of 'assert' in non-WIN32 code
-         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
-           avoid infinite loop
-      * Always call 'fREe()' rather than 'free()'
-
-    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
-      * Fixed ordering problem with boundary-stamping
-
-    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
-      * Added pvalloc, as recommended by H.J. Liu
-      * Added 64bit pointer support mainly from Wolfram Gloger
-      * Added anonymously donated WIN32 sbrk emulation
-      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
-      * malloc_extend_top: fix mask error that caused wastage after
-        foreign sbrks
-      * Add linux mremap support code from HJ Liu
-
-    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
-      * Integrated most documentation with the code.
-      * Add support for mmap, with help from
-        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Use last_remainder in more cases.
-      * Pack bins using idea from  colin@nyx10.cs.du.edu
-      * Use ordered bins instead of best-fit threshhold
-      * Eliminate block-local decls to simplify tracing and debugging.
-      * Support another case of realloc via move into top
-      * Fix error occuring when initial sbrk_base not word-aligned.
-      * Rely on page size for units instead of SBRK_UNIT to
-        avoid surprises about sbrk alignment conventions.
-      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
-        (raymond@es.ele.tue.nl) for the suggestion.
-      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
-      * More precautions for cases where other routines call sbrk,
-        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Added macros etc., allowing use in linux libc from
-        H.J. Lu (hjl@gnu.ai.mit.edu)
-      * Inverted this history list
-
-    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
-      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
-      * Removed all preallocation code since under current scheme
-        the work required to undo bad preallocations exceeds
-        the work saved in good cases for most test programs.
-      * No longer use return list or unconsolidated bins since
-        no scheme using them consistently outperforms those that don't
-        given above changes.
-      * Use best fit for very large chunks to prevent some worst-cases.
-      * Added some support for debugging
-
-    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
-      * Removed footers when chunks are in use. Thanks to
-        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
-
-    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
-      * Added malloc_trim, with help from Wolfram Gloger
-        (wmglo@Dent.MED.Uni-Muenchen.DE).
-
-    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
-
-    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
-      * realloc: try to expand in both directions
-      * malloc: swap order of clean-bin strategy;
-      * realloc: only conditionally expand backwards
-      * Try not to scavenge used bins
-      * Use bin counts as a guide to preallocation
-      * Occasionally bin return list chunks in first scan
-      * Add a few optimizations from colin@nyx10.cs.du.edu
-
-    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
-      * faster bin computation & slightly different binning
-      * merged all consolidations to one part of malloc proper
-         (eliminating old malloc_find_space & malloc_clean_bin)
-      * Scan 2 returns chunks (not just 1)
-      * Propagate failure in realloc if malloc returns 0
-      * Add stuff to allow compilation on non-ANSI compilers
-          from kpv@research.att.com
-
-    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
-      * removed potential for odd address access in prev_chunk
-      * removed dependency on getpagesize.h
-      * misc cosmetics and a bit more internal documentation
-      * anticosmetics: mangled names in macros to evade debugger strangeness
-      * tested on sparc, hp-700, dec-mips, rs6000
-          with gcc & native cc (hp, dec only) allowing
-          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
-    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
-      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
-         structure of old version,  but most details differ.)
-
-*/
-
-#ifdef __cplusplus
-}; /* end of extern "C" */
-#endif
-
-#endif /* MALLOC_270_H */