From 65a554bc65f21b84f1785f66e1155be8a3aa9159 Mon Sep 17 00:00:00 2001 From: Andrew LeFevre Date: Mon, 27 Apr 2026 13:25:54 -0400 Subject: [PATCH 1/4] add back deleted files --- .clang-format | 229 + .gitignore | 10 +- ocean/impulse_wars/CMakeLists.txt | 138 + ocean/impulse_wars/Makefile | 61 + ocean/impulse_wars/README.md | 12 + ocean/impulse_wars/binding.h | 177 + ocean/impulse_wars/helpers.h | 4 +- ocean/impulse_wars/impulse_wars.py | 181 + ocean/impulse_wars/pyproject.toml | 25 + ocean/impulse_wars/types.h | 2 +- vendor/cc_array.h | 1410 +++++++ vendor/cc_common.h | 75 + vendor/dlmalloc.h | 6264 ++++++++++++++++++++++++++++ vendor/rlights.h | 2 +- 14 files changed, 8581 insertions(+), 9 deletions(-) create mode 100644 .clang-format create mode 100644 ocean/impulse_wars/CMakeLists.txt create mode 100644 ocean/impulse_wars/Makefile create mode 100644 ocean/impulse_wars/README.md create mode 100644 ocean/impulse_wars/binding.h create mode 100644 ocean/impulse_wars/impulse_wars.py create mode 100644 ocean/impulse_wars/pyproject.toml create mode 100644 vendor/cc_array.h create mode 100644 vendor/cc_common.h create mode 100644 vendor/dlmalloc.h diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..d9ba19d3de --- /dev/null +++ b/.clang-format @@ -0,0 +1,229 @@ +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: BlockIndent +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: false +BinPackParameters: false +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeClosingBracket: Always +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Custom +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 0 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: false +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: true +InsertNewlineAtEOF: false +InsertTrailingCommas: Wrapped +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: Never +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/.gitignore b/.gitignore index b01c266e47..5b31358da0 100644 --- a/.gitignore +++ b/.gitignore @@ -162,11 +162,11 @@ raylib*/ box2d*/ # Temp Impulse Wars files -pufferlib/ocean/impulse_wars/*-debug/ -pufferlib/ocean/impulse_wars/*-release/ -pufferlib/ocean/impulse_wars/debug-*/ -pufferlib/ocean/impulse_wars/release-*/ -pufferlib/ocean/impulse_wars/benchmark/ +ocean/impulse_wars/*-debug/ +ocean/impulse_wars/*-release/ +ocean/impulse_wars/debug-*/ +ocean/impulse_wars/release-*/ +ocean/impulse_wars/benchmark/ # Data resources/drive/data/* diff --git a/ocean/impulse_wars/CMakeLists.txt b/ocean/impulse_wars/CMakeLists.txt new file mode 100644 index 0000000000..55f49a73f6 --- /dev/null +++ b/ocean/impulse_wars/CMakeLists.txt @@ -0,0 +1,138 @@ +# 3.22 was released on Nov 2021, should be widely available +cmake_minimum_required(VERSION 3.22) +include(FetchContent) + +project( + impulse-wars + DESCRIPTION "Impulse Wars" + LANGUAGES C +) + +message(INFO " C Compiler: ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} ${CMAKE_C_COMPILER_ID}") + +# use ccache if available to speed up subsequent builds +find_program(CCACHE_FOUND "ccache") +if(CCACHE_FOUND) + set(CMAKE_C_COMPILER_LAUNCHER "ccache") +endif() + +# enable some C23 features, the c2x standard is a WIP standard supported +# by gcc since 9 (May 2019) and clang since 9 (Sep 2019) +set(CMAKE_C_FLAGS_INIT " -std=c2x") + +# force position independent code everywhere to prevent some rare +# linker errors depending on what compiler is used +add_compile_options("-fPIC") + +if(CMAKE_BUILD_TYPE MATCHES Debug) + # leak detection doesn't work correctly when the code is called by + # Python, so disable it + if(DEFINED BUILD_PYTHON_MODULE) + add_compile_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow") + add_link_options("-shared-libasan" "-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow") + else() + add_compile_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow,leak") + add_link_options("-fno-omit-frame-pointer" "-fsanitize=address,undefined,bounds,pointer-overflow,leak") + endif() + + # mold is an extremely fast linker, use it if available + # only use mold in debug mode, link time optimization currently doesn't + # work with mold and provides large speedups + find_program(MOLD_FOUND "mold") + if(MOLD_FOUND) + add_link_options("-fuse-ld=mold") + endif() +else() + add_compile_options("-flto" "-fno-math-errno") + if (NOT DEFINED EMSCRIPTEN) + # emscripten doesn't support -march=native, it doesn't make sense + # for WASM anyway + add_compile_options("-march=native") + else() + # tell emscripten to generate an HTML file that can be used to + # test the WASM, and ensure necessary code is transformed to be + # async friendly; it allows the game to be run much more smoothly + set(CMAKE_EXECUTABLE_SUFFIX ".html") + add_link_options("-sASYNCIFY") + endif() + # ensure the linker used is from the same compiler toolchain, or else + # link time optimization will probably fail; if we're using + # emscripten it will use it's own linker + if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT DEFINED EMSCRIPTEN) + add_link_options("-fuse-ld=lld") + endif() + + # add_compile_options("-pg") + # add_link_options("-pg") +endif() + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +set(FETCHCONTENT_QUIET FALSE) + +# fetch and configure dependencies +FetchContent_Declare( + raylib + URL https://github.com/raysan5/raylib/archive/c1ab645ca298a2801097931d1079b10ff7eb9df8.zip # 5.5 +) +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Statically link raylib" FORCE) +set(WITH_PIC "Compile static library as position-independent code" ON) +set(CUSTOMIZE_BUILD ON CACHE BOOL "Customize raylib build settings" FORCE) +set(USE_AUDIO OFF CACHE BOOL "Don't build unused audio module" FORCE) +FetchContent_MakeAvailable(raylib) + +# if box2d is fetched first installing built python module will fail +# for reasons unbeknownst to mere mortals +# maybe due to install prefix schenanigans? +FetchContent_Declare( + box2d + URL https://github.com/capnspacehook/box2d/archive/df25d747be0ab2fd9425eece022d2ec897c2028d.zip +) +set(BOX2D_ENABLE_SIMD ON CACHE BOOL "Enable SIMD math (faster)" FORCE) +set(BOX2D_AVX2 ON CACHE BOOL "Enable AVX2 (faster)" FORCE) +add_compile_definitions(B2_MAX_WORLDS=65534) +FetchContent_MakeAvailable(box2d) +# this is set to off by box2d to enable cross platform determinism, but +# I don't care about that and want the small speedup instead +target_compile_options(box2d PRIVATE "-ffp-contract=fast") + +function(configure_target target_name) + target_include_directories( + ${target_name} PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/../../vendor" + ) + + # Mark box2d as a system include directory to suppress warnings from it + target_include_directories(${target_name} SYSTEM PRIVATE "${box2d_SOURCE_DIR}/src") + + target_link_libraries(${target_name} PRIVATE raylib box2d) + + target_compile_options(${target_name} PRIVATE + "-Werror" "-Wall" "-Wextra" "-Wpedantic" + "-Wno-implicit-fallthrough" "-Wno-variadic-macros" "-Wno-strict-prototypes" "-Wno-gnu-statement-expression" + ) +endfunction() + +if(DEFINED BUILD_PYTHON_MODULE) + find_package( + Python + COMPONENTS Interpreter Development.Module NumPy + REQUIRED + ) + + python_add_library(binding MODULE binding.c WITH_SOABI) + + target_include_directories(binding PRIVATE + ${Python_NumPy_INCLUDE_DIRS} + ) + + configure_target(binding) + + install(TARGETS binding DESTINATION .) +elseif(DEFINED BUILD_DEMO) + add_executable(demo "${CMAKE_CURRENT_SOURCE_DIR}/impulse_wars.c") + configure_target(demo) +elseif(DEFINED BUILD_BENCHMARK) + add_executable(benchmark "${CMAKE_CURRENT_SOURCE_DIR}/benchmark.c") + configure_target(benchmark) +endif() diff --git a/ocean/impulse_wars/Makefile b/ocean/impulse_wars/Makefile new file mode 100644 index 0000000000..ce593669da --- /dev/null +++ b/ocean/impulse_wars/Makefile @@ -0,0 +1,61 @@ +RELEASE_PYTHON_MODULE_DIR := python-module-release +DEBUG_PYTHON_MODULE_DIR := python-module-debug +DEBUG_DIR := debug-demo +RELEASE_DIR := release-demo +RELEASE_WEB_DIR := release-demo-web +BENCHMARK_DIR := benchmark + +DEBUG_BUILD_TYPE := Debug +RELEASE_BUILD_TYPE := Release + +# install build dependencies if this is a fresh build, Python won't +# install build dependencies when --no-build-isolation is passed +# build with no isolation so that builds can be cached and/or incremental + +# build Python module in release mode +.PHONY: python-module-release +python-module-release: + @test -d $(RELEASE_PYTHON_MODULE_DIR) || pip install scikit-build-core autopxd2 cython + @pip install --no-build-isolation --config-settings=editable.rebuild=true -Cbuild-dir=$(RELEASE_PYTHON_MODULE_DIR) -v . + +# build Python module in debug mode +.PHONY: python-module-debug +python-module-debug: + @test -d $(DEBUG_PYTHON_MODULE_DIR) || pip install scikit-build-core autopxd2 cython + @pip install --no-build-isolation --config-settings=editable.rebuild=true --config-settings=cmake.build-type="Debug" -Cbuild-dir=$(DEBUG_PYTHON_MODULE_DIR) -v . + +# build C demo in debug mode +.PHONY: debug-demo +debug-demo: + @mkdir -p $(DEBUG_DIR) + @cd $(DEBUG_DIR) && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=$(DEBUG_BUILD_TYPE) -DBUILD_DEMO=true -DCMAKE_C_COMPILER=clang-20 .. && \ + cmake --build . + +# build C demo in release mode +.PHONY: release-demo +release-demo: + @mkdir -p $(RELEASE_DIR) + @cd $(RELEASE_DIR) && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DBUILD_DEMO=true -DCMAKE_C_COMPILER=clang-20 .. && \ + cmake --build . + +# build C demo in release mode for web +.PHONY: release-demo-web +release-demo-web: + @mkdir -p $(RELEASE_WEB_DIR) + @cd $(RELEASE_WEB_DIR) && \ + emcmake cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DPLATFORM=Web -DBUILD_DEMO=true .. && \ + cmake --build . + +# build C benchmark +.PHONY: benchmark +benchmark: + @mkdir -p $(BENCHMARK_DIR) + @cd $(BENCHMARK_DIR) && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=$(RELEASE_BUILD_TYPE) -DBUILD_BENCHMARK=true -DCMAKE_C_COMPILER=clang-20 .. && \ + cmake --build . + +.PHONY: clean +clean: + @rm -rf build $(RELEASE_PYTHON_MODULE_DIR) $(DEBUG_PYTHON_MODULE_DIR) $(DEBUG_DIR) $(RELEASE_DIR) $(RELEASE_WEB_DIR) $(BENCHMARK_DIR) diff --git a/ocean/impulse_wars/README.md b/ocean/impulse_wars/README.md new file mode 100644 index 0000000000..accca74381 --- /dev/null +++ b/ocean/impulse_wars/README.md @@ -0,0 +1,12 @@ +# Impulse Wars + +To build, you need to have the following: +- cmake +- make +- ninja +- raylib required deps installed: https://github.com/raysan5/raylib/wiki/Working-on-GNU-Linux + +Run `make && cp python-module-release/binding.*.so .` to build the python module in release mode. +`puffer_impulse_wars` env should now be trainable. + +When watching evaluations, you need to set all instances of `is_training = False` and `render = True` in the config file. diff --git a/ocean/impulse_wars/binding.h b/ocean/impulse_wars/binding.h new file mode 100644 index 0000000000..28b429773b --- /dev/null +++ b/ocean/impulse_wars/binding.h @@ -0,0 +1,177 @@ +#include + +#include "env.h" + +static PyObject *get_consts(PyObject *self, PyObject *args); + +#define Env iwEnv +#define MY_SHARED +#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"} + +#include "../env_binding.h" + +#define setDictVal(dict, key, val) \ + if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) { \ + PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \ + return NULL; \ + } + +static PyObject *get_consts(PyObject *self, PyObject *args) { + PyObject *dronesArg = PyTuple_GetItem(args, 0); + if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) { + PyErr_SetString(PyExc_TypeError, "num_drones must be an integer"); + return NULL; + } + const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg); + + PyObject *dict = PyDict_New(); + if (PyErr_Occurred()) { + return NULL; + } + + const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE); + + setDictVal(dict, "obsBytes", obsBytes(numDrones)); + setDictVal(dict, "mapObsSize", MAP_OBS_SIZE); + setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones)); + setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones)); + setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float)); + setDictVal(dict, "wallTypes", NUM_WALL_TYPES); + setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1); + setDictVal(dict, "mapObsRows", MAP_OBS_ROWS); + setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS); + setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float))); + setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS); + setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET); + setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE); + setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE); + setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET); + setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS); + setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET); + setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE); + setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE); + setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET); + setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS); + setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET); + setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE); + setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE); + setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET); + setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS); + setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET); + setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET); + setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE); + setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE); + setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET); + setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET); + setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET); + setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE); + setDictVal(dict, "droneObsOffset", droneObsOffset); + setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE); + setDictVal(dict, "miscObsSize", MISC_OBS_SIZE); + setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE); + + setDictVal(dict, "maxDrones", MAX_DRONES); + setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE); + + return dict; +} + +static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { + VecEnv *ve = unpack_vecenv(args); + initMaps(ve->envs[0]); + + for (uint16_t i = 0; i < ve->num_envs; i++) { + iwEnv *e = (iwEnv *)ve->envs[i]; + setupEnv(e); + } + + return Py_None; +} + +static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) { + initEnv( + e, + (uint8_t)unpack(kwargs, "num_drones"), + (uint8_t)unpack(kwargs, "num_agents"), + (int8_t)unpack(kwargs, "map_idx"), + (uint64_t)unpack(kwargs, "seed"), + (bool)unpack(kwargs, "enable_teams"), + (bool)unpack(kwargs, "sitting_duck"), + (bool)unpack(kwargs, "is_training"), + (bool)unpack(kwargs, "continuous") + ); + setRewards( + e, + (float)unpack(kwargs, "reward_win"), + (float)unpack(kwargs, "reward_self_kill"), + (float)unpack(kwargs, "reward_enemy_death"), + (float)unpack(kwargs, "reward_enemy_kill"), + 0.0f, // teammate death punishment + 0.0f, // teammate kill punishment + (float)unpack(kwargs, "reward_death"), + (float)unpack(kwargs, "reward_energy_emptied"), + (float)unpack(kwargs, "reward_weapon_pickup"), + (float)unpack(kwargs, "reward_shield_break"), + (float)unpack(kwargs, "reward_shot_hit_coef"), + (float)unpack(kwargs, "reward_explosion_hit_coef") + ); + return 0; +} + +#define _LOG_BUF_SIZE 128 + +char *droneLog(char *buf, const uint8_t droneIdx, const char *name) { + snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name); + return buf; +} + +char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) { + snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name); + return buf; +} + +static int my_log(PyObject *dict, Log *log) { + assign_to_dict(dict, "episode_length", log->length); + assign_to_dict(dict, "ties", log->ties); + + assign_to_dict(dict, "perf", log->stats[0].wins); + assign_to_dict(dict, "score", log->stats[0].wins); + + char buf[_LOG_BUF_SIZE] = {0}; + for (uint8_t i = 0; i < MAX_DRONES; i++) { + assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns); + assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled); + assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled); + assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime); + assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); + assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); + assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); + assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); + assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); + assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); + assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills); + assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); + assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins); + + // useful for debugging weapon balance, but really slows down + // sweeps due to adding a ton of extra logging data + // + // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) { + // assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]); + // assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]); + // assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]); + // assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]); + // assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]); + // assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]); + // } + + assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired); + assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit); + assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken); + assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken); + assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp); + assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances); + } + + return 0; +} diff --git a/ocean/impulse_wars/helpers.h b/ocean/impulse_wars/helpers.h index 1692d6b9bd..7fb6a8305a 100644 --- a/ocean/impulse_wars/helpers.h +++ b/ocean/impulse_wars/helpers.h @@ -8,7 +8,7 @@ #include "box2d/box2d.h" -#include "include/cc_array.h" +#include "cc_array.h" #ifndef NDEBUG #define ON_ERROR __builtin_trap() @@ -121,7 +121,7 @@ #define fastFree(ptr) free(ptr) #define fastFreeFn free #else -#include "include/dlmalloc.h" +#include "dlmalloc.h" #define fastMalloc(size) dlmalloc(size) #define fastMallocFn dlmalloc #define fastCalloc(nmemb, size) dlcalloc(nmemb, size) diff --git a/ocean/impulse_wars/impulse_wars.py b/ocean/impulse_wars/impulse_wars.py new file mode 100644 index 0000000000..6fc2f5d27e --- /dev/null +++ b/ocean/impulse_wars/impulse_wars.py @@ -0,0 +1,181 @@ +from types import SimpleNamespace + +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.impulse_wars import binding + + +discMoveToContMove = np.array([ + [1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0, 0.707107, 0.0], + [0.0, 0.707107, 1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0], +], dtype=np.float32) +discAimToContAim = np.array([ + [1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0, 0.382683, 0.707107, 0.92388, 0.0], + [0.0, 0.382683, 0.707107, 0.92388, 1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0], +], dtype=np.float32) + + +class ImpulseWars(pufferlib.PufferEnv): + def __init__( + self, + num_envs: int = 1, + num_drones: int = 2, + num_agents: int = 1, + enable_teams: bool = False, + sitting_duck: bool = False, + continuous: bool = False, + is_training: bool = True, + human_control: bool = False, + reward_win: float = 2.0, + reward_self_kill: float = -1.0, + reward_enemy_death: float = 1.0, + reward_enemy_kill: float = 1.0, + reward_death: float = -0.25, + reward_energy_emptied: float = -0.75, + reward_weapon_pickup: float = 0.5, + reward_shield_break: float = 0.5, + reward_shot_hit_coef: float = 0.005, + reward_explosion_hit_coef: float = 0.005, + seed: int = 0, + render: bool = False, + report_interval: int = 64, + buf = None, + ): + self.obsInfo = SimpleNamespace(**binding.get_consts(num_drones)) + + if num_envs <= 0: + raise ValueError("num_envs must be greater than 0") + if num_drones > self.obsInfo.maxDrones or num_drones <= 0: + raise ValueError(f"num_drones must greater than 0 and less than or equal to {self.obsInfo.maxDrones}") + if num_agents > num_drones or num_agents <= 0: + raise ValueError("num_agents must greater than 0 and less than or equal to num_drones") + if enable_teams and (num_drones % 2 != 0 or num_drones <= 2): + raise ValueError("enable_teams is only supported for even numbers of drones greater than 2") + + self.numDrones = num_drones + self.continuous = continuous + + self.num_agents = num_agents * num_envs + self.tick = 0 + + # map observations are bit packed to save space, and scalar + # observations need to be floats + self.single_observation_space = gymnasium.spaces.Box( + low=0, high=255, shape=(self.obsInfo.obsBytes,), dtype=np.uint8 + ) + + if self.continuous: + # action space is actually bounded by (-1, 1) but pufferlib + # will check that actions are within the bounds of the action + # space before actions get to the env, and we ensure the actions + # are bounded there; so set bounds to (-inf, inf) here so + # action bounds checks pass + self.single_action_space = gymnasium.spaces.Box( + low=float("-inf"), high=float("inf"), shape=(self.obsInfo.contActionsSize,), dtype=np.float32 + ) + else: + self.single_action_space = gymnasium.spaces.MultiDiscrete( + [ + 9, # move, noop + 8 directions + 17, # aim, noop + 16 directions + 2, # shoot or not + 2, # brake or not + 2, # burst + ] + ) + + self.report_interval = report_interval + self.render_mode = "human" if render else None + + super().__init__(buf) + if not self.continuous: + self.actions = np.zeros((self.num_agents, self.obsInfo.contActionsSize), dtype=np.float32) + + self.c_envs = binding.vec_init( + self.observations, + self.actions, + self.rewards, + self.terminals, + self.truncations, + num_envs, + seed, + num_drones=num_drones, + num_agents=num_agents, + map_idx=-1, + enable_teams=enable_teams, + sitting_duck=sitting_duck, + is_training=is_training, + continuous=continuous, + reward_win=reward_win, + reward_self_kill=reward_self_kill, + reward_enemy_death=reward_enemy_death, + reward_enemy_kill=reward_enemy_kill, + reward_death=reward_death, + reward_energy_emptied=reward_energy_emptied, + reward_weapon_pickup=reward_weapon_pickup, + reward_shield_break=reward_shield_break, + reward_shot_hit_coef=reward_shot_hit_coef, + reward_explosion_hit_coef=reward_explosion_hit_coef, + ) + + binding.shared(self.c_envs) + + def reset(self, seed=None): + self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) + return self.observations, [] + + def step(self, actions): + if self.continuous: + self.actions[:] = actions + else: + contMove = discMoveToContMove[:, actions[:, 0]].T + contAim = discAimToContAim[:, actions[:, 1]].T + contRest = actions[:, 2:].astype(np.float32) + self.actions[:] = np.concatenate([contMove, contAim, contRest], axis=1) + + self.tick += 1 + binding.vec_step(self.c_envs) + + infos = [] + if self.tick % self.report_interval == 0: + infos.append(binding.vec_log(self.c_envs)) + + return self.observations, self.rewards, self.terminals, self.truncations, infos + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + + +def testPerf(timeout, actionCache, numEnvs): + env = ImpulseWars(numEnvs) + + import time + + np.random.seed(int(time.time())) + actions = np.random.uniform(-1, 1, (actionCache, env.num_agents, 7)) + + tick = 0 + start = time.time() + while time.time() - start < timeout: + action = actions[tick % actionCache] + env.step(action) + tick += 1 + + sps = numEnvs * (tick / (time.time() - start)) + print(f"SPS: {sps:,}") + print(f"Steps: {numEnvs * tick}") + + env.close() + + +if __name__ == "__main__": + testPerf(timeout=5, actionCache=1024, numEnvs=1) diff --git a/ocean/impulse_wars/pyproject.toml b/ocean/impulse_wars/pyproject.toml new file mode 100644 index 0000000000..df67b2bd17 --- /dev/null +++ b/ocean/impulse_wars/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "autopxd2>=2.5.0", "cython>=3.0.11"] +build-backend = "scikit_build_core.build" + +[project] +name = "binding" +version = "1.0.0" +requires-python = ">=3.11" + +[tool.scikit-build] +minimum-version = "build-system.requires" +cmake.build-type = "Release" +build.verbose = true +logging.level = "INFO" + +[tool.scikit-build.cmake.define] +BUILD_PYTHON_MODULE = true +CMAKE_C_COMPILER = "clang-20" + +[tool.ruff] +line-length = 110 + +[tool.ruff.lint] +# skip "Module level import not at top of file" +ignore = ["E402"] diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h index 6df5014e95..9bad059336 100644 --- a/ocean/impulse_wars/types.h +++ b/ocean/impulse_wars/types.h @@ -6,7 +6,7 @@ #include "raylib.h" #include "rlights.h" -#include "include/cc_array.h" +#include "cc_array.h" #include "settings.h" diff --git a/vendor/cc_array.h b/vendor/cc_array.h new file mode 100644 index 0000000000..311f99122b --- /dev/null +++ b/vendor/cc_array.h @@ -0,0 +1,1410 @@ +/* + * Collections-C + * Copyright (C) 2013-2015 Srđan Panić + * + * This file is part of Collections-C. + * + * Collections-C is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Collections-C is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Collections-C. If not, see . + */ + +#ifndef CC_ARRAY_H +#define CC_ARRAY_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cc_common.h" + +/** + * A dynamic array that expands automatically as elements are + * added. The array supports amortized constant time insertion + * and removal of elements at the end of the array, as well as + * constant time access. + */ +typedef struct cc_array_s CC_Array; + +/** + * Array configuration structure. Used to initialize a new Array + * with specific values. + */ +typedef struct cc_array_conf_s { + /** + * The initial capacity of the array */ + size_t capacity; + + /** + * The rate at which the buffer expands (capacity * exp_factor). */ + float exp_factor; + + /** + * Memory allocators used to allocate the Array structure and the + * underlying data buffers. */ + void *(*mem_alloc)(size_t size); + void *(*mem_calloc)(size_t blocks, size_t size); + void (*mem_free)(void *block); +} CC_ArrayConf; + +/** + * Array iterator structure. Used to iterate over the elements of + * the array in an ascending order. The iterator also supports + * operations for safely adding and removing elements during + * iteration. + */ +typedef struct cc_array_iter_s { + /** + * The array associated with this iterator */ + CC_Array *ar; + + /** + * The current position of the iterator.*/ + size_t index; + + /** + * Set to true if the last returned element was removed. */ + bool last_removed; +} CC_ArrayIter; + +/** + * Array zip iterator structure. Used to iterate over the elements of two + * arrays in lockstep in an ascending order until one of the Arrays is + * exhausted. The iterator also supports operations for safely adding + * and removing elements during iteration. + */ +typedef struct array_zip_iter_s { + CC_Array *ar1; + CC_Array *ar2; + size_t index; + bool last_removed; +} CC_ArrayZipIter; + +enum cc_stat cc_array_new(CC_Array **out); +enum cc_stat cc_array_new_conf(CC_ArrayConf const *const conf, CC_Array **out); +void cc_array_conf_init(CC_ArrayConf *conf); +size_t cc_array_struct_size(); + +void cc_array_destroy(CC_Array *ar); +void cc_array_destroy_cb(CC_Array *ar, void (*cb)(void *)); + +enum cc_stat cc_array_add(CC_Array *ar, void *element); +enum cc_stat cc_array_add_at(CC_Array *ar, void *element, size_t index); +enum cc_stat cc_array_replace_at(CC_Array *ar, void *element, size_t index, void **out); +enum cc_stat cc_array_swap_at(CC_Array *ar, size_t index1, size_t index2); + +enum cc_stat cc_array_remove(CC_Array *ar, void *element, void **out); +enum cc_stat cc_array_remove_fast(CC_Array *ar, void *element, void **out); +enum cc_stat cc_array_remove_at(CC_Array *ar, size_t index, void **out); +enum cc_stat cc_array_remove_fast_at(CC_Array *ar, size_t index, void **out); +enum cc_stat cc_array_remove_last(CC_Array *ar, void **out); +void cc_array_remove_all(CC_Array *ar); +void cc_array_remove_all_free(CC_Array *ar); + +enum cc_stat cc_array_get_at(const CC_Array *ar, size_t index, void **out); +enum cc_stat cc_array_get_last(const CC_Array *ar, void **out); + +enum cc_stat cc_array_subarray(CC_Array *ar, size_t from, size_t to, CC_Array **out); +enum cc_stat cc_array_copy_shallow(CC_Array *ar, CC_Array **out); +enum cc_stat cc_array_copy_deep(CC_Array *ar, void *(*cp)(void *), CC_Array **out); + +void cc_array_reverse(CC_Array *ar); +enum cc_stat cc_array_trim_capacity(CC_Array *ar); + +size_t cc_array_contains(const CC_Array *ar, void *element); +size_t cc_array_contains_value(const CC_Array *ar, void *element, int (*cmp)(const void *, const void *)); +size_t cc_array_size(const CC_Array *ar); +size_t cc_array_capacity(const CC_Array *ar); + +enum cc_stat cc_array_index_of(const CC_Array *ar, void *element, size_t *index); +void cc_array_sort(CC_Array *ar, int (*cmp)(const void *, const void *)); + +void cc_array_map(CC_Array *ar, void (*fn)(void *)); +void cc_array_reduce(CC_Array *ar, void (*fn)(void *, void *, void *), void *result); + +enum cc_stat cc_array_filter_mut(CC_Array *ar, bool (*predicate)(const void *)); +enum cc_stat cc_array_filter(CC_Array *ar, bool (*predicate)(const void *), CC_Array **out); + +void cc_array_iter_init(CC_ArrayIter *iter, CC_Array *ar); +enum cc_stat cc_array_iter_next(CC_ArrayIter *iter, void **out); +enum cc_stat cc_array_iter_remove(CC_ArrayIter *iter, void **out); +enum cc_stat cc_array_iter_remove_fast(CC_ArrayIter *iter, void **out); +enum cc_stat cc_array_iter_add(CC_ArrayIter *iter, void *element); +enum cc_stat cc_array_iter_replace(CC_ArrayIter *iter, void *element, void **out); +size_t cc_array_iter_index(CC_ArrayIter *iter); + +void cc_array_zip_iter_init(CC_ArrayZipIter *iter, CC_Array *a1, CC_Array *a2); +enum cc_stat cc_array_zip_iter_next(CC_ArrayZipIter *iter, void **out1, void **out2); +enum cc_stat cc_array_zip_iter_add(CC_ArrayZipIter *iter, void *e1, void *e2); +enum cc_stat cc_array_zip_iter_remove(CC_ArrayZipIter *iter, void **out1, void **out2); +enum cc_stat cc_array_zip_iter_replace(CC_ArrayZipIter *iter, void *e1, void *e2, void **out1, void **out2); +size_t cc_array_zip_iter_index(CC_ArrayZipIter *iter); + +const void *const *cc_array_get_buffer(CC_Array *ar); + +#define CC_ARRAY_FOREACH(val, array, body) \ + { \ + CC_ArrayIter cc_array_iter_53d46d2a04458e7b; \ + cc_array_iter_init(&cc_array_iter_53d46d2a04458e7b, array); \ + void *val; \ + while (cc_array_iter_next(&cc_array_iter_53d46d2a04458e7b, &val) != CC_ITER_END) \ + body \ + } + +#define CC_ARRAY_FOREACH_ZIP(val1, val2, array1, array2, body) \ + { \ + CC_ArrayZipIter cc_array_zip_iter_ea08d3e52f25883b3; \ + cc_array_zip_iter_init(&cc_array_zip_iter_ea08d3e52f25883b3, array1, array2); \ + void *val1; \ + void *val2; \ + while (cc_array_zip_iter_next(&cc_array_zip_iter_ea08d3e52f25883b3, &val1, &val2) != CC_ITER_END) \ + body \ + } + +#define DEFAULT_CAPACITY 8 +#define DEFAULT_EXPANSION_FACTOR 2 + +struct cc_array_s { + size_t size; + size_t capacity; + float exp_factor; + void **buffer; + + void *(*mem_alloc)(size_t size); + void *(*mem_calloc)(size_t blocks, size_t size); + void (*mem_free)(void *block); +}; + +static enum cc_stat expand_array_capacity(CC_Array *ar); + +/** + * Creates a new empty array and returns a status code. + * + * @param[out] out pointer to where the newly created CC_Array is to be stored + * + * @return CC_OK if the creation was successful, or CC_ERR_ALLOC if the + * memory allocation for the new CC_Array structure failed. + */ +enum cc_stat cc_array_new(CC_Array **out) { + CC_ArrayConf c; + cc_array_conf_init(&c); + return cc_array_new_conf(&c, out); +} + +/** + * Creates a new empty CC_Array based on the specified CC_ArrayConf struct and + * returns a status code. + * + * The CC_Array is allocated using the allocators specified in the CC_ArrayConf + * struct. The allocation may fail if underlying allocator fails. It may also + * fail if the values of exp_factor and capacity in the CC_ArrayConf do not meet + * the following condition: exp_factor < (CC_MAX_ELEMENTS / capacity). + * + * @param[in] conf array configuration structure + * @param[out] out pointer to where the newly created CC_Array is to be stored + * + * @return CC_OK if the creation was successful, CC_ERR_INVALID_CAPACITY if + * the above mentioned condition is not met, or CC_ERR_ALLOC if the memory + * allocation for the new CC_Array structure failed. + */ +enum cc_stat cc_array_new_conf(CC_ArrayConf const *const conf, CC_Array **out) { + float ex; + + /* The expansion factor must be greater than one for the + * array to grow */ + if (conf->exp_factor <= 1) { + ex = DEFAULT_EXPANSION_FACTOR; + } else { + ex = conf->exp_factor; + } + + /* Needed to avoid an integer overflow on the first resize and + * to easily check for any future overflows. */ + if (!conf->capacity || ex >= CC_MAX_ELEMENTS / conf->capacity) { + return CC_ERR_INVALID_CAPACITY; + } + + CC_Array *ar = (CC_Array *)conf->mem_calloc(1, sizeof(CC_Array)); + + if (!ar) { + return CC_ERR_ALLOC; + } + + void **buff = (void **)conf->mem_alloc(conf->capacity * sizeof(void *)); + + if (!buff) { + conf->mem_free(ar); + return CC_ERR_ALLOC; + } + + ar->buffer = buff; + ar->exp_factor = ex; + ar->capacity = conf->capacity; + ar->mem_alloc = conf->mem_alloc; + ar->mem_calloc = conf->mem_calloc; + ar->mem_free = conf->mem_free; + + *out = ar; + return CC_OK; +} + +/** + * Initializes the fields of the CC_ArrayConf struct to default values. + * + * @param[in, out] conf CC_ArrayConf structure that is being initialized + */ +void cc_array_conf_init(CC_ArrayConf *conf) { + conf->exp_factor = DEFAULT_EXPANSION_FACTOR; + conf->capacity = DEFAULT_CAPACITY; + conf->mem_alloc = malloc; + conf->mem_calloc = calloc; + conf->mem_free = free; +} + +/** + * Destroys the CC_Array structure, but leaves the data it used to hold intact. + * + * @param[in] ar the array that is to be destroyed + */ +void cc_array_destroy(CC_Array *ar) { + ar->mem_free(ar->buffer); + ar->mem_free(ar); +} + +/** + * Destroys the CC_Array structure along with all the data it holds. + * + * @note + * This function should not be called on a array that has some of its elements + * allocated on the stack. + * + * @param[in] ar the array that is being destroyed + */ +void cc_array_destroy_cb(CC_Array *ar, void (*cb)(void *)) { + size_t i; + for (i = 0; i < ar->size; i++) { + cb(ar->buffer[i]); + } + + cc_array_destroy(ar); +} + +/** + * Adds a new element to the CC_Array. The element is appended to the array making + * it the last element (the one with the highest index) of the CC_Array. + * + * @param[in] ar the array to which the element is being added + * @param[in] element the element that is being added + * + * @return CC_OK if the element was successfully added, CC_ERR_ALLOC if the + * memory allocation for the new element failed, or CC_ERR_MAX_CAPACITY if the + * array is already at maximum capacity. + */ +enum cc_stat cc_array_add(CC_Array *ar, void *element) { + if (ar->size >= ar->capacity) { + enum cc_stat status = expand_array_capacity(ar); + if (status != CC_OK) { + return status; + } + } + + ar->buffer[ar->size] = element; + ar->size++; + + return CC_OK; +} + +/** + * Adds a new element to the array at a specified position by shifting all + * subsequent elements by one. The specified index must be within the bounds + * of the array. This function may also fail if the memory allocation for + * the new element was unsuccessful. + * + * @param[in] ar the array to which the element is being added + * @param[in] element the element that is being added + * @param[in] index the position in the array at which the element is being + * added + * + * @return CC_OK if the element was successfully added, CC_ERR_OUT_OF_RANGE if + * the specified index was not in range, CC_ERR_ALLOC if the memory + * allocation for the new element failed, or CC_ERR_MAX_CAPACITY if the + * array is already at maximum capacity. + */ +enum cc_stat cc_array_add_at(CC_Array *ar, void *element, size_t index) { + if (index == ar->size) { + return cc_array_add(ar, element); + } + + if ((ar->size == 0 && index != 0) || index > (ar->size - 1)) { + return CC_ERR_OUT_OF_RANGE; + } + + if (ar->size >= ar->capacity) { + enum cc_stat status = expand_array_capacity(ar); + if (status != CC_OK) { + return status; + } + } + + size_t shift = (ar->size - index) * sizeof(void *); + + memmove(&(ar->buffer[index + 1]), + &(ar->buffer[index]), + shift); + + ar->buffer[index] = element; + ar->size++; + + return CC_OK; +} + +/** + * Replaces an array element at the specified index and optionally sets the out + * parameter to the value of the replaced element. The specified index must be + * within the bounds of the CC_Array. + * + * @param[in] ar array whose element is being replaced + * @param[in] element replacement element + * @param[in] index index at which the replacement element should be inserted + * @param[out] out pointer to where the replaced element is stored, or NULL if + * it is to be ignored + * + * @return CC_OK if the element was successfully replaced, or CC_ERR_OUT_OF_RANGE + * if the index was out of range. + */ +enum cc_stat cc_array_replace_at(CC_Array *ar, void *element, size_t index, void **out) { + if (index >= ar->size) { + return CC_ERR_OUT_OF_RANGE; + } + + if (out) { + *out = ar->buffer[index]; + } + + ar->buffer[index] = element; + + return CC_OK; +} + +enum cc_stat cc_array_swap_at(CC_Array *ar, size_t index1, size_t index2) { + void *tmp; + + if (index1 >= ar->size || index2 >= ar->size) { + return CC_ERR_OUT_OF_RANGE; + } + + tmp = ar->buffer[index1]; + + ar->buffer[index1] = ar->buffer[index2]; + ar->buffer[index2] = tmp; + return CC_OK; +} + +/** + * Removes the specified element from the CC_Array if such element exists and + * optionally sets the out parameter to the value of the removed element. + * + * @param[in] ar array from which the element is being removed + * @param[in] element element being removed + * @param[out] out pointer to where the removed value is stored, or NULL + * if it is to be ignored + * + * @return CC_OK if the element was successfully removed, or + * CC_ERR_VALUE_NOT_FOUND if the element was not found. + */ +enum cc_stat cc_array_remove(CC_Array *ar, void *element, void **out) { + size_t index; + enum cc_stat status = cc_array_index_of(ar, element, &index); + + if (status == CC_ERR_OUT_OF_RANGE) { + return CC_ERR_VALUE_NOT_FOUND; + } + + if (index != ar->size - 1) { + size_t block_size = (ar->size - 1 - index) * sizeof(void *); + + memmove(&(ar->buffer[index]), + &(ar->buffer[index + 1]), + block_size); + } + ar->size--; + + if (out) { + *out = element; + } + + return CC_OK; +} + +/** + * Removes a CC_Array element without preserving order and optionally sets the + * out parameter to the value of the removed element. The last element of the + * array is moved to the index of the element being removed, and the last + * element is removed. + * + * @param[in] ar the array whose last element is being removed + * @param[out] out pointer to where the removed value is stored, or NULL if it is + * to be ignored + * + * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE + * if the CC_Array is already empty. + */ +enum cc_stat cc_array_remove_fast(CC_Array *ar, void *element, void **out) { + size_t index = 0; + const enum cc_stat status = cc_array_index_of(ar, element, &index); + if (status != CC_OK) { + return status; + } + + if (out) { + *out = ar->buffer[index]; + } + + ar->buffer[index] = ar->buffer[ar->size - 1]; + ar->size--; + + return CC_OK; +} + +/** + * Removes an CC_Array element from the specified index and optionally sets the + * out parameter to the value of the removed element. The index must be within + * the bounds of the array. + * + * @param[in] ar the array from which the element is being removed + * @param[in] index the index of the element being removed. + * @param[out] out pointer to where the removed value is stored, + * or NULL if it is to be ignored + * + * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE + * if the index was out of range. + */ +enum cc_stat cc_array_remove_at(CC_Array *ar, size_t index, void **out) { + if (index >= ar->size) { + return CC_ERR_OUT_OF_RANGE; + } + + if (out) { + *out = ar->buffer[index]; + } + + if (index != ar->size - 1) { + size_t block_size = (ar->size - 1 - index) * sizeof(void *); + + memmove(&(ar->buffer[index]), + &(ar->buffer[index + 1]), + block_size); + } + ar->size--; + + return CC_OK; +} + +/** + * Removes a CC_Array element from the specified index and optionally sets the + * out parameter to the value of the removed element without preserving ordering. + * The last element of the array is moved to the index of the element being removed, + * and the last element is removed. The index must be within the bounds of the array. + * + * @param[in] ar the array from which the element is being removed + * @param[in] index the index of the element being removed. + * @param[out] out pointer to where the removed value is stored, + * or NULL if it is to be ignored + * + * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE + * if the index was out of range. + */ +enum cc_stat cc_array_remove_fast_at(CC_Array *ar, size_t index, void **out) { + if (index >= ar->size) { + return CC_ERR_OUT_OF_RANGE; + } + + if (out) { + *out = ar->buffer[index]; + } + + ar->buffer[index] = ar->buffer[ar->size - 1]; + ar->size--; + + return CC_OK; +} + +/** + * Removes an CC_Array element from the end of the array and optionally sets the + * out parameter to the value of the removed element. + * + * @param[in] ar the array whose last element is being removed + * @param[out] out pointer to where the removed value is stored, or NULL if it is + * to be ignored + * + * @return CC_OK if the element was successfully removed, or CC_ERR_OUT_OF_RANGE + * if the CC_Array is already empty. + */ +enum cc_stat cc_array_remove_last(CC_Array *ar, void **out) { + return cc_array_remove_at(ar, ar->size - 1, out); +} + +/** + * Removes all elements from the specified array. This function does not shrink + * the array capacity. + * + * @param[in] ar array from which all elements are to be removed + */ +void cc_array_remove_all(CC_Array *ar) { + ar->size = 0; +} + +/** + * Removes and frees all elements from the specified array. This function does + * not shrink the array capacity. + * + * @param[in] ar array from which all elements are to be removed + */ +void cc_array_remove_all_free(CC_Array *ar) { + size_t i; + for (i = 0; i < ar->size; i++) { + free(ar->buffer[i]); + } + + cc_array_remove_all(ar); +} + +/** + * Gets an CC_Array element from the specified index and sets the out parameter to + * its value. The specified index must be within the bounds of the array. + * + * @param[in] ar the array from which the element is being retrieved + * @param[in] index the index of the array element + * @param[out] out pointer to where the element is stored + * + * @return CC_OK if the element was found, or CC_ERR_OUT_OF_RANGE if the index + * was out of range. + */ +enum cc_stat cc_array_get_at(const CC_Array *ar, size_t index, void **out) { + if (index >= ar->size) { + return CC_ERR_OUT_OF_RANGE; + } + + *out = ar->buffer[index]; + return CC_OK; +} + +/** + * Gets the last element of the array or the element at the highest index + * and sets the out parameter to its value. + * + * @param[in] ar the array whose last element is being returned + * @param[out] out pointer to where the element is stored + * + * @return CC_OK if the element was found, or CC_ERR_VALUE_NOT_FOUND if the + * CC_Array is empty. + */ +enum cc_stat cc_array_get_last(const CC_Array *ar, void **out) { + if (ar->size == 0) { + return CC_ERR_VALUE_NOT_FOUND; + } + + return cc_array_get_at(ar, ar->size - 1, out); +} + +/** + * Returns the underlying array buffer. + * + * @note Any direct modification of the buffer may invalidate the CC_Array. + * + * @param[in] ar array whose underlying buffer is being returned + * + * @return array's internal buffer. + */ +const void *const *cc_array_get_buffer(CC_Array *ar) { + return (const void *const *)ar->buffer; +} + +/** + * Gets the index of the specified element. The returned index is the index + * of the first occurrence of the element starting from the beginning of the + * CC_Array. + * + * @param[in] ar array being searched + * @param[in] element the element whose index is being looked up + * @param[out] index pointer to where the index is stored + * + * @return CC_OK if the index was found, or CC_OUT_OF_RANGE if not. + */ +enum cc_stat cc_array_index_of(const CC_Array *ar, void *element, size_t *index) { + size_t i; + for (i = 0; i < ar->size; i++) { + if (ar->buffer[i] == element) { + *index = i; + return CC_OK; + } + } + return CC_ERR_OUT_OF_RANGE; +} + +/** + * Creates a subarray of the specified CC_Array, ranging from b + * index (inclusive) to e index (inclusive). The range indices + * must be within the bounds of the CC_Array, while the e index + * must be greater or equal to the b index. + * + * @note The new CC_Array is allocated using the original CC_Array's allocators + * and it also inherits the configuration of the original CC_Array. + * + * @param[in] ar array from which the subarray is being created + * @param[in] b the beginning index (inclusive) of the subarray that must be + * within the bounds of the array and must not exceed the + * the end index + * @param[in] e the end index (inclusive) of the subarray that must be within + * the bounds of the array and must be greater or equal to the + * beginning index + * @param[out] out pointer to where the new sublist is stored + * + * @return CC_OK if the subarray was successfully created, CC_ERR_INVALID_RANGE + * if the specified index range is invalid, or CC_ERR_ALLOC if the memory allocation + * for the new subarray failed. + */ +enum cc_stat cc_array_subarray(CC_Array *ar, size_t b, size_t e, CC_Array **out) { + if (b > e || e >= ar->size) { + return CC_ERR_INVALID_RANGE; + } + + CC_Array *sub_ar = (CC_Array *)ar->mem_calloc(1, sizeof(CC_Array)); + + if (!sub_ar) { + return CC_ERR_ALLOC; + } + + /* Try to allocate the buffer */ + if (!(sub_ar->buffer = (void **)ar->mem_alloc(ar->capacity * sizeof(void *)))) { + ar->mem_free(sub_ar); + return CC_ERR_ALLOC; + } + + sub_ar->mem_alloc = ar->mem_alloc; + sub_ar->mem_calloc = ar->mem_calloc; + sub_ar->mem_free = ar->mem_free; + sub_ar->size = e - b + 1; + sub_ar->capacity = sub_ar->size; + + memcpy(sub_ar->buffer, + &(ar->buffer[b]), + sub_ar->size * sizeof(void *)); + + *out = sub_ar; + return CC_OK; +} + +/** + * Creates a shallow copy of the specified CC_Array. A shallow copy is a copy of + * the CC_Array structure, but not the elements it holds. + * + * @note The new CC_Array is allocated using the original CC_Array's allocators + * and it also inherits the configuration of the original array. + * + * @param[in] ar the array to be copied + * @param[out] out pointer to where the newly created copy is stored + * + * @return CC_OK if the copy was successfully created, or CC_ERR_ALLOC if the + * memory allocation for the copy failed. + */ +enum cc_stat cc_array_copy_shallow(CC_Array *ar, CC_Array **out) { + CC_Array *copy = (CC_Array *)ar->mem_alloc(sizeof(CC_Array)); + + if (!copy) { + return CC_ERR_ALLOC; + } + + if (!(copy->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) { + ar->mem_free(copy); + return CC_ERR_ALLOC; + } + copy->exp_factor = ar->exp_factor; + copy->size = ar->size; + copy->capacity = ar->capacity; + copy->mem_alloc = ar->mem_alloc; + copy->mem_calloc = ar->mem_calloc; + copy->mem_free = ar->mem_free; + + memcpy(copy->buffer, + ar->buffer, + copy->size * sizeof(void *)); + + *out = copy; + return CC_OK; +} + +/** + * Creates a deep copy of the specified CC_Array. A deep copy is a copy of + * both the CC_Array structure and the data it holds. + * + * @note The new CC_Array is allocated using the original CC_Array's allocators + * and it also inherits the configuration of the original CC_Array. + * + * @param[in] ar array to be copied + * @param[in] cp the copy function that should return a pointer to the copy of + * the data + * @param[out] out pointer to where the newly created copy is stored + * + * @return CC_OK if the copy was successfully created, or CC_ERR_ALLOC if the + * memory allocation for the copy failed. + */ +enum cc_stat cc_array_copy_deep(CC_Array *ar, void *(*cp)(void *), CC_Array **out) { + CC_Array *copy = (CC_Array *)ar->mem_alloc(sizeof(CC_Array)); + + if (!copy) { + return CC_ERR_ALLOC; + } + + if (!(copy->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) { + ar->mem_free(copy); + return CC_ERR_ALLOC; + } + + copy->exp_factor = ar->exp_factor; + copy->size = ar->size; + copy->capacity = ar->capacity; + copy->mem_alloc = ar->mem_alloc; + copy->mem_calloc = ar->mem_calloc; + copy->mem_free = ar->mem_free; + + size_t i; + for (i = 0; i < copy->size; i++) { + copy->buffer[i] = cp(ar->buffer[i]); + } + + *out = copy; + + return CC_OK; +} + +/** + * Filters the CC_Array by modifying it. It removes all elements that don't + * return true on pred(element). + * + * @param[in] ar array that is to be filtered + * @param[in] pred predicate function which returns true if the element should + * be kept in the CC_Array + * + * @return CC_OK if the CC_Array was filtered successfully, or CC_ERR_OUT_OF_RANGE + * if the CC_Array is empty. + */ +enum cc_stat cc_array_filter_mut(CC_Array *ar, bool (*pred)(const void *)) { + if (ar->size == 0) { + return CC_ERR_OUT_OF_RANGE; + } + + size_t rm = 0; + size_t keep = 0; + + /* Look for clusters of non matching elements before moving + * in order to minimize the number of memmoves */ + for (size_t i = ar->size - 1; i != ((size_t)-1); i--) { + if (!pred(ar->buffer[i])) { + rm++; + continue; + } + if (rm > 0) { + if (keep > 0) { + size_t block_size = keep * sizeof(void *); + memmove(&(ar->buffer[i + 1]), + &(ar->buffer[i + 1 + rm]), + block_size); + } + ar->size -= rm; + rm = 0; + } + keep++; + } + /* Remove any remaining elements*/ + if (rm > 0) { + size_t block_size = keep * sizeof(void *); + memmove(&(ar->buffer[0]), + &(ar->buffer[rm]), + block_size); + + ar->size -= rm; + } + return CC_OK; +} + +/** + * Filters the CC_Array by creating a new CC_Array that contains all elements from the + * original CC_Array that return true on pred(element) without modifying the original + * CC_Array. + * + * @param[in] ar array that is to be filtered + * @param[in] pred predicate function which returns true if the element should + * be kept in the filtered array + * @param[out] out pointer to where the new filtered CC_Array is to be stored + * + * @return CC_OK if the CC_Array was filtered successfully, CC_ERR_OUT_OF_RANGE + * if the CC_Array is empty, or CC_ERR_ALLOC if the memory allocation for the + * new CC_Array failed. + */ +enum cc_stat cc_array_filter(CC_Array *ar, bool (*pred)(const void *), CC_Array **out) { + if (ar->size == 0) { + return CC_ERR_OUT_OF_RANGE; + } + + CC_Array *filtered = (CC_Array *)ar->mem_alloc(sizeof(CC_Array)); + + if (!filtered) { + return CC_ERR_ALLOC; + } + + if (!(filtered->buffer = (void **)ar->mem_calloc(ar->capacity, sizeof(void *)))) { + ar->mem_free(filtered); + return CC_ERR_ALLOC; + } + + filtered->exp_factor = ar->exp_factor; + filtered->size = 0; + filtered->capacity = ar->capacity; + filtered->mem_alloc = ar->mem_alloc; + filtered->mem_calloc = ar->mem_calloc; + filtered->mem_free = ar->mem_free; + + size_t f = 0; + for (size_t i = 0; i < ar->size; i++) { + if (pred(ar->buffer[i])) { + filtered->buffer[f++] = ar->buffer[i]; + filtered->size++; + } + } + *out = filtered; + + return CC_OK; +} + +/** + * Reverses the order of elements in the specified array. + * + * @param[in] ar array that is being reversed + */ +void cc_array_reverse(CC_Array *ar) { + if (ar->size == 0) { + return; + } + + size_t i; + size_t j; + for (i = 0, j = ar->size - 1; i < ar->size / 2; i++, j--) { + void *tmp = ar->buffer[i]; + ar->buffer[i] = ar->buffer[j]; + ar->buffer[j] = tmp; + } +} + +/** + * Trims the array's capacity, in other words, it shrinks the capacity to match + * the number of elements in the CC_Array, however the capacity will never shrink + * below 1. + * + * @param[in] ar array whose capacity is being trimmed + * + * @return CC_OK if the capacity was trimmed successfully, or CC_ERR_ALLOC if + * the reallocation failed. + */ +enum cc_stat cc_array_trim_capacity(CC_Array *ar) { + if (ar->size == ar->capacity) { + return CC_OK; + } + + void **new_buff = (void **)ar->mem_calloc(ar->size, sizeof(void *)); + + if (!new_buff) { + return CC_ERR_ALLOC; + } + + size_t size = ar->size < 1 ? 1 : ar->size; + + memcpy(new_buff, ar->buffer, size * sizeof(void *)); + ar->mem_free(ar->buffer); + + ar->buffer = new_buff; + ar->capacity = ar->size; + + return CC_OK; +} + +/** + * Returns the number of occurrences of the element within the specified CC_Array. + * + * @param[in] ar array that is being searched + * @param[in] element the element that is being searched for + * + * @return the number of occurrences of the element. + */ +size_t cc_array_contains(const CC_Array *ar, void *element) { + size_t o = 0; + size_t i; + for (i = 0; i < ar->size; i++) { + if (ar->buffer[i] == element) { + o++; + } + } + return o; +} + +/** + * Returns the number of occurrences of the value pointed to by e + * within the specified CC_Array. + * + * @param[in] ar array that is being searched + * @param[in] element the element that is being searched for + * @param[in] cmp comparator function which returns 0 if the values passed to it are equal + * + * @return the number of occurrences of the value. + */ +size_t cc_array_contains_value(const CC_Array *ar, void *element, int (*cmp)(const void *, const void *)) { + size_t o = 0; + size_t i; + for (i = 0; i < ar->size; i++) { + if (cmp(element, ar->buffer[i]) == 0) { + o++; + } + } + return o; +} + +/** + * Returns the size of the specified CC_Array. The size of the array is the + * number of elements contained within the CC_Array. + * + * @param[in] ar array whose size is being returned + * + * @return the the number of element within the CC_Array. + */ +size_t cc_array_size(const CC_Array *ar) { + return ar->size; +} + +/** + * Returns the capacity of the specified CC_Array. The capacity of the CC_Array is + * the maximum number of elements an CC_Array can hold before it has to be resized. + * + * @param[in] ar array whose capacity is being returned + * + * @return the capacity of the CC_Array. + */ +size_t cc_array_capacity(const CC_Array *ar) { + return ar->capacity; +} + +/** + * Sorts the specified array. + * + * @note + * Pointers passed to the comparator function will be pointers to the array + * elements that are of type (void*) ie. void**. So an extra step of + * dereferencing will be required before the data can be used for comparison: + * eg. my_type e = *(*((my_type**) ptr));. + * + * @code + * enum cc_stat mycmp(const void *e1, const void *e2) { + * MyType el1 = *(*((enum cc_stat**) e1)); + * MyType el2 = *(*((enum cc_stat**) e2)); + * + * if (el1 < el2) return -1; + * if (el1 > el2) return 1; + * return 0; + * } + * + * ... + * + * cc_array_sort(array, mycmp); + * @endcode + * + * @param[in] ar array to be sorted + * @param[in] cmp the comparator function that must be of type + * enum cc_stat cmp(const void e1*, const void e2*) that + * returns < 0 if the first element goes before the second, + * 0 if the elements are equal and > 0 if the second goes + * before the first + */ +void cc_array_sort(CC_Array *ar, int (*cmp)(const void *, const void *)) { + qsort(ar->buffer, ar->size, sizeof(void *), cmp); +} + +/** + * Expands the CC_Array capacity. This might fail if the the new buffer + * cannot be allocated. In case the expansion would overflow the index + * range, a maximum capacity buffer is allocated instead. If the capacity + * is already at the maximum capacity, no new buffer is allocated. + * + * @param[in] ar array whose capacity is being expanded + * + * @return CC_OK if the buffer was expanded successfully, CC_ERR_ALLOC if + * the memory allocation for the new buffer failed, or CC_ERR_MAX_CAPACITY + * if the array is already at maximum capacity. + */ +static enum cc_stat expand_array_capacity(CC_Array *ar) { + if (ar->capacity == CC_MAX_ELEMENTS) { + return CC_ERR_MAX_CAPACITY; + } + + size_t new_capacity = (size_t)(ar->capacity * ar->exp_factor); + + /* As long as the capacity is greater that the expansion factor + * at the point of overflow, this is check is valid. */ + if (new_capacity <= ar->capacity) { + ar->capacity = CC_MAX_ELEMENTS; + } else { + ar->capacity = new_capacity; + } + + void **new_buff = (void **)ar->mem_alloc(ar->capacity * sizeof(void *)); + + if (!new_buff) { + return CC_ERR_ALLOC; + } + + memcpy(new_buff, ar->buffer, ar->size * sizeof(void *)); + + ar->mem_free(ar->buffer); + ar->buffer = new_buff; + + return CC_OK; +} + +/** + * Applies the function fn to each element of the CC_Array. + * + * @param[in] ar array on which this operation is performed + * @param[in] fn operation function that is to be invoked on each CC_Array + * element + */ +void cc_array_map(CC_Array *ar, void (*fn)(void *e)) { + size_t i; + for (i = 0; i < ar->size; i++) { + fn(ar->buffer[i]); + } +} + +/** + * A fold/reduce function that collects all of the elements in the array + * together. For example, if we have an array of [a,b,c...] the end result + * will be (...((a+b)+c)+...). + * + * @param[in] ar the array on which this operation is performed + * @param[in] fn the operation function that is to be invoked on each array + * element + * @param[in] result the pointer which will collect the end result + */ +void cc_array_reduce(CC_Array *ar, void (*fn)(void *, void *, void *), void *result) { + if (ar->size == 1) { + fn(ar->buffer[0], NULL, result); + return; + } + if (ar->size > 1) { + fn(ar->buffer[0], ar->buffer[1], result); + } + + for (size_t i = 2; i < ar->size; i++) { + fn(result, ar->buffer[i], result); + } +} + +/** + * Initializes the iterator. + * + * @param[in] iter the iterator that is being initialized + * @param[in] ar the array to iterate over + */ +void cc_array_iter_init(CC_ArrayIter *iter, CC_Array *ar) { + iter->ar = ar; + iter->index = 0; + iter->last_removed = false; +} + +/** + * Advances the iterator and sets the out parameter to the value of the + * next element in the sequence. + * + * @param[in] iter the iterator that is being advanced + * @param[out] out pointer to where the next element is set + * + * @return CC_OK if the iterator was advanced, or CC_ITER_END if the + * end of the CC_Array has been reached. + */ +enum cc_stat cc_array_iter_next(CC_ArrayIter *iter, void **out) { + if (iter->index >= iter->ar->size) { + return CC_ITER_END; + } + + *out = iter->ar->buffer[iter->index]; + + iter->index++; + iter->last_removed = false; + + return CC_OK; +} + +/** + * Removes the last returned element by cc_array_iter_next() + * function without invalidating the iterator and optionally sets the out + * parameter to the value of the removed element. + * + * @note This function should only ever be called after a call to + * cc_array_iter_next(). + + * @param[in] iter the iterator on which this operation is being performed + * @param[out] out pointer to where the removed element is stored, or NULL + * if it is to be ignored + * + * @return CC_OK if the element was successfully removed, or + * CC_ERR_VALUE_NOT_FOUND. + */ +enum cc_stat cc_array_iter_remove(CC_ArrayIter *iter, void **out) { + enum cc_stat status = CC_ERR_VALUE_NOT_FOUND; + + if (!iter->last_removed) { + status = cc_array_remove_at(iter->ar, iter->index - 1, out); + if (status != CC_OK) { + return status; + } + + iter->last_removed = true; + if (iter->index > 0) { + iter->index--; + } + } + return status; +} + +/** + * Removes the last returned element by cc_array_iter_next() + * function without invalidating the iterator and optionally sets the out + * parameter to the value of the removed element. The order of the array + * is not preserved, the last element of the array is moved to the index + * of the last returned element and the last element is removed. + * + * @note This function should only ever be called after a call to + * cc_array_iter_next(). + + * @param[in] iter the iterator on which this operation is being performed + * @param[out] out pointer to where the removed element is stored, or NULL + * if it is to be ignored + * + * @return CC_OK if the element was successfully removed, or + * CC_ERR_VALUE_NOT_FOUND. + */ +enum cc_stat cc_array_iter_remove_fast(CC_ArrayIter *iter, void **out) { + enum cc_stat status = CC_ERR_VALUE_NOT_FOUND; + + if (!iter->last_removed) { + status = cc_array_remove_fast_at(iter->ar, iter->index - 1, out); + if (status != CC_OK) { + return status; + } + + iter->last_removed = true; + if (iter->index > 0) { + iter->index--; + } + } + return status; +} + +/** + * Adds a new element to the CC_Array after the last returned element by + * cc_array_iter_next() function without invalidating the + * iterator. + * + * @note This function should only ever be called after a call to + * cc_array_iter_next(). + * + * @param[in] iter the iterator on which this operation is being performed + * @param[in] element the element being added + * + * @return CC_OK if the element was successfully added, CC_ERR_ALLOC if the + * memory allocation for the new element failed, or CC_ERR_MAX_CAPACITY if + * the array is already at maximum capacity. + */ +enum cc_stat cc_array_iter_add(CC_ArrayIter *iter, void *element) { + return cc_array_add_at(iter->ar, element, iter->index++); +} + +/** + * Replaces the last returned element by cc_array_iter_next() + * with the specified element and optionally sets the out parameter to + * the value of the replaced element. + * + * @note This function should only ever be called after a call to + * cc_array_iter_next(). + * + * @param[in] iter the iterator on which this operation is being performed + * @param[in] element the replacement element + * @param[out] out pointer to where the replaced element is stored, or NULL + * if it is to be ignored + * + * @return CC_OK if the element was replaced successfully, or + * CC_ERR_OUT_OF_RANGE. + */ +enum cc_stat cc_array_iter_replace(CC_ArrayIter *iter, void *element, void **out) { + return cc_array_replace_at(iter->ar, element, iter->index - 1, out); +} + +/** + * Returns the index of the last returned element by cc_array_iter_next() + * . + * + * @note + * This function should not be called before a call to cc_array_iter_next() + * . + * + * @param[in] iter the iterator on which this operation is being performed + * + * @return the index. + */ +size_t cc_array_iter_index(CC_ArrayIter *iter) { + return iter->index - 1; +} + +/** + * Initializes the zip iterator. + * + * @param[in] iter iterator that is being initialized + * @param[in] ar1 first array + * @param[in] ar2 second array + */ +void cc_array_zip_iter_init(CC_ArrayZipIter *iter, CC_Array *ar1, CC_Array *ar2) { + iter->ar1 = ar1; + iter->ar2 = ar2; + iter->index = 0; + iter->last_removed = false; +} + +/** + * Outputs the next element pair in the sequence and advances the iterator. + * + * @param[in] iter iterator that is being advanced + * @param[out] out1 output of the first array element + * @param[out] out2 output of the second array element + * + * @return CC_OK if a next element pair is returned, or CC_ITER_END if the end of one + * of the arrays has been reached. + */ +enum cc_stat cc_array_zip_iter_next(CC_ArrayZipIter *iter, void **out1, void **out2) { + if (iter->index >= iter->ar1->size || iter->index >= iter->ar2->size) { + return CC_ITER_END; + } + + *out1 = iter->ar1->buffer[iter->index]; + *out2 = iter->ar2->buffer[iter->index]; + + iter->index++; + iter->last_removed = false; + + return CC_OK; +} + +/** + * Removes and outputs the last returned element pair by cc_array_zip_iter_next() + * without invalidating the iterator. + * + * @param[in] iter iterator on which this operation is being performed + * @param[out] out1 output of the removed element from the first array + * @param[out] out2 output of the removed element from the second array + * + * @return CC_OK if the element was successfully removed, CC_ERR_OUT_OF_RANGE if the + * state of the iterator is invalid, or CC_ERR_VALUE_NOT_FOUND if the element was + * already removed. + */ +enum cc_stat cc_array_zip_iter_remove(CC_ArrayZipIter *iter, void **out1, void **out2) { + if ((iter->index - 1) >= iter->ar1->size || (iter->index - 1) >= iter->ar2->size) { + return CC_ERR_OUT_OF_RANGE; + } + + if (!iter->last_removed) { + cc_array_remove_at(iter->ar1, iter->index - 1, out1); + cc_array_remove_at(iter->ar2, iter->index - 1, out2); + iter->last_removed = true; + return CC_OK; + } + return CC_ERR_VALUE_NOT_FOUND; +} + +/** + * Adds a new element pair to the arrays after the last returned element pair by + * cc_array_zip_iter_next() and immediately before an element pair + * that would be returned by a subsequent call to cc_array_zip_iter_next() + * without invalidating the iterator. + * + * @param[in] iter iterator on which this operation is being performed + * @param[in] e1 element added to the first array + * @param[in] e2 element added to the second array + * + * @return CC_OK if the element pair was successfully added to the arrays, or + * CC_ERR_ALLOC if the memory allocation for the new elements failed. + */ +enum cc_stat cc_array_zip_iter_add(CC_ArrayZipIter *iter, void *e1, void *e2) { + size_t index = iter->index++; + CC_Array *ar1 = iter->ar1; + CC_Array *ar2 = iter->ar2; + + /* Make sure both array buffers have room */ + if ((ar1->size == ar1->capacity && (expand_array_capacity(ar1) != CC_OK)) || + (ar2->size == ar2->capacity && (expand_array_capacity(ar2) != CC_OK))) { + return CC_ERR_ALLOC; + } + + cc_array_add_at(ar1, e1, index); + cc_array_add_at(ar2, e2, index); + + return CC_OK; +} + +/** + * Replaces the last returned element pair by cc_array_zip_iter_next() + * with the specified replacement element pair. + * + * @param[in] iter iterator on which this operation is being performed + * @param[in] e1 first array's replacement element + * @param[in] e2 second array's replacement element + * @param[out] out1 output of the replaced element from the first array + * @param[out] out2 output of the replaced element from the second array + * + * @return CC_OK if the element was successfully replaced, or CC_ERR_OUT_OF_RANGE. + */ +enum cc_stat cc_array_zip_iter_replace(CC_ArrayZipIter *iter, void *e1, void *e2, void **out1, void **out2) { + if ((iter->index - 1) >= iter->ar1->size || (iter->index - 1) >= iter->ar2->size) { + return CC_ERR_OUT_OF_RANGE; + } + + cc_array_replace_at(iter->ar1, e1, iter->index - 1, out1); + cc_array_replace_at(iter->ar2, e2, iter->index - 1, out2); + + return CC_OK; +} + +/** + * Returns the index of the last returned element pair by cc_array_zip_iter_next(). + * + * @param[in] iter iterator on which this operation is being performed + * + * @return current iterator index. + */ +size_t cc_array_zip_iter_index(CC_ArrayZipIter *iter) { + return iter->index - 1; +} + +size_t cc_array_struct_size() { + return sizeof(CC_Array); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/vendor/cc_common.h b/vendor/cc_common.h new file mode 100644 index 0000000000..1740460646 --- /dev/null +++ b/vendor/cc_common.h @@ -0,0 +1,75 @@ +/* + * Collections-C + * Copyright (C) 2013-2014 Srđan Panić + * + * This file is part of Collections-C. + * + * Collections-C is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Collections-C is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Collections-C. If not, see . + */ + +#ifndef CC_COMMON_H +#define CC_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#ifdef ARCH_64 +#define MAX_POW_TWO (((size_t)1) << 63) +#else +#define MAX_POW_TWO (((size_t)1) << 31) +#endif /* ARCH_64 */ + +enum cc_stat { + CC_OK = 0, + + CC_ERR_ALLOC = 1, + CC_ERR_INVALID_CAPACITY = 2, + CC_ERR_INVALID_RANGE = 3, + CC_ERR_MAX_CAPACITY = 4, + CC_ERR_KEY_NOT_FOUND = 6, + CC_ERR_VALUE_NOT_FOUND = 7, + CC_ERR_OUT_OF_RANGE = 8, + + CC_ITER_END = 9, +}; + +#define CC_MAX_ELEMENTS ((size_t) - 2) + +#if defined(_MSC_VER) + +#define INLINE __inline +#define FORCE_INLINE __forceinline + +#else + +#define INLINE inline +#define FORCE_INLINE inline __attribute__((always_inline)) + +#endif /* _MSC_VER */ + +int cc_common_cmp_str(const void *key1, const void *key2); + +#define CC_CMP_STRING cc_common_cmp_str + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/vendor/dlmalloc.h b/vendor/dlmalloc.h new file mode 100644 index 0000000000..4ef7c9cfd5 --- /dev/null +++ b/vendor/dlmalloc.h @@ -0,0 +1,6264 @@ +/* + Default header file for malloc-2.7.2, written by Doug Lea + and released to the public domain. Use, modify, and redistribute + this code without permission or acknowledgement in any way you wish. + Send questions, comments, complaints, performance data, etc to + dl@cs.oswego.edu. + + last update: Sun Feb 25 18:38:11 2001 Doug Lea (dl at gee) + + This header is for ANSI C/C++ only. You can set either of + the following #defines before including: + + * If USE_DL_PREFIX is defined, it is assumed that malloc.c + was also compiled with this option, so all routines + have names starting with "dl". + + * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this + file will be #included AFTER . This is needed only if + your system defines a struct mallinfo that is incompatible with the + standard one declared here. Otherwise, you can include this file + INSTEAD of your system system . At least on ANSI, all + declarations should be compatible with system versions +*/ + +#ifndef MALLOC_270_H +#define MALLOC_270_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include /* for size_t */ + +#define USE_DL_PREFIX + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available. Additionally, on failure, errno is + set to ENOMEM on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. The minimum size + is 16 bytes on most 32bit systems, and either 24 or 32 bytes on + 64bit systems, depending on internal size and alignment restrictions. + + On most systems, size_t is an unsigned type. Calls with values of n + that appear "negative" when signed are interpreted as requests for + huge amounts of space, which will most often fail. + + The maximum allowed value of n differs across systems, but is in all + cases less (typically by 8K) than the maximum representable value of + a size_t. Requests greater than this value result in failure. +*/ + +#ifndef USE_DL_PREFIX +void *malloc(size_t); +#else +void *dlmalloc(size_t); +#endif + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. It can have arbitrary (and bad!) + effects if p has already been freed or was not obtained via malloc. + + Unless disabled using mallopt, freeing very large spaces will, + when possible, automatically trigger operations that give + back unused memory to the system, thus reducing program footprint. +*/ +#ifndef USE_DL_PREFIX +void free(void *); +#else +void dlfree(void *); +#endif + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +#ifndef USE_DL_PREFIX +void *calloc(size_t, size_t); +#else +void *dlcalloc(size_t, size_t); +#endif + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p when possible, otherwise it employs the + equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. Unless the #define + REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of + zero (re)allocates a minimum-sized chunk. + + Large chunks that were internally obtained via mmap will always + be reallocated using malloc-copy-free sequences unless + the system supports MREMAP (currently only linux). + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ + +#ifndef USE_DL_PREFIX +void *realloc(void *, size_t); +#else +void *dlrealloc(void *, size_t); +#endif + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ + +#ifndef USE_DL_PREFIX +void *memalign(size_t, size_t); +#else +void *dlmemalign(size_t, size_t); +#endif + +/* + valloc(size_t n); + Allocates a page-aligned chunk of at least n bytes. + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ + +#ifndef USE_DL_PREFIX +void *valloc(size_t); +#else +void *dlvalloc(size_t); +#endif + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements, each of which can hold contents of size + elem_size. Each element starts out cleared, and can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) return 0; // failure + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ + +#ifndef USE_DL_PREFIX +void **independent_calloc(size_t, size_t, void **); +#else +void **dlindependent_calloc(size_t, size_t, void **); +#endif + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ + +#ifndef USE_DL_PREFIX +void **independent_comalloc(size_t, size_t *, void **); +#else +void **dlindependent_comalloc(size_t, size_t *, void **); +#endif + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ + +#ifndef USE_DL_PREFIX +void *pvalloc(size_t); +#else +void *dlpvalloc(size_t); +#endif + +/* + cfree(void* p); + Equivalent to free(p). + + cfree is needed/defined on some systems that pair it with calloc, + for odd historical reasons (such as: cfree is used in example + code in the first edition of K&R). +*/ + +#ifndef USE_DL_PREFIX +void cfree(void *); +#else +void dlcfree(void *); +#endif + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative + arguments to sbrk) if there is unused memory at the `high' end of + the malloc pool. You can call this after freeing large blocks of + memory to potentially reduce the system-level memory requirements + of a program. However, it cannot guarantee to reduce memory. Under + some allocation patterns, some large free blocks of memory will be + locked between two used chunks, so they cannot be given back to + the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, + only the minimum amount of memory to maintain internal data + structures will be left (one page or less). Non-zero arguments + can be supplied to maintain enough trailing space to service + future expected allocations without having to re-obtain memory + from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. + On systems that do not support "negative sbrks", it will always + return 0. +*/ + +#ifndef USE_DL_PREFIX +int malloc_trim(size_t); +#else +int dlmalloc_trim(size_t); +#endif + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in an allocated + chunk, which may be more than you requested (although often not) due + to alignment and minimum size constraints. You can use this many + bytes without worrying about overwriting other allocated + objects. This is not a particularly great programming practice. But + malloc_usable_size can be more useful in debugging and assertions, + for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ + +#ifndef USE_DL_PREFIX +size_t malloc_usable_size(void *); +#else +size_t dlmalloc_usable_size(void *); +#endif + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ + +#ifndef USE_DL_PREFIX +void malloc_stats(void); +#else +void dlmalloc_stats(void); +#endif + +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: the number of fastbin blocks (i.e., small chunks that + have been freed but not use resused or consolidated) + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: total bytes held in fastbin blocks + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + The names of some of these fields don't bear much relation with + their contents because this struct was defined as standard in + SVID/XPG so reflects the malloc implementation that was then used + in SystemV Unix. + + The original SVID version of this struct, defined on most systems + with mallinfo, declares all fields as ints. But some others define + as unsigned long. If your system defines the fields using a type of + different width than listed here, you should #include your system + version before including this file. The struct declaration is + suppressed if _MALLOC_H is defined (which is done in most system + malloc.h files). You can also suppress it by defining + HAVE_USR_INCLUDE_MALLOC_H. + + Because these fields are ints, but internal bookkeeping is done with + unsigned longs, the reported values may appear as negative, and may + wrap around zero and thus be inaccurate. +*/ + +#ifndef HAVE_USR_INCLUDE_MALLOC_H +#ifndef _MALLOC_H +struct mallinfo { + int arena; + int ordblks; + int smblks; + int hblks; + int hblkhd; + int usmblks; + int fsmblks; + int uordblks; + int fordblks; + int keepcost; +}; +#endif +#endif + +#ifndef USE_DL_PREFIX +struct mallinfo mallinfo(void); +#else +struct mallinfo mallinfo(void); +#endif + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG defines four standard param numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports four + other options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_MXFAST 1 64 0-80 (0 disables fastbins) + M_TRIM_THRESHOLD -1 128*1024 any (-1U disables trimming) + M_TOP_PAD -2 0 any + M_MMAP_THRESHOLD -3 128*1024 any (or 0 if no MMAP support) + M_MMAP_MAX -4 65536 any (0 disables use of mmap) +*/ + +#ifndef USE_DL_PREFIX +int mallopt(int, int); +#else +int dlmallopt(int, int); +#endif + +/* Descriptions of tuning options */ + +/* + M_MXFAST is the maximum request size used for "fastbins", special bins + that hold returned chunks without consolidating their spaces. This + enables future requests for chunks of the same size to be handled + very quickly, but can increase fragmentation, and thus increase the + overall memory footprint of a program. + + This malloc manages fastbins very conservatively yet still + efficiently, so fragmentation is rarely a problem for values less + than or equal to the default. The maximum supported value of MXFAST + is 80. You wouldn't want it any higher than this anyway. Fastbins + are designed especially for use with many small structs, objects or + strings -- the default handles structs/objects/arrays with sizes up + to 8 4byte fields, or small strings representing words, tokens, + etc. Using fastbins for larger objects normally worsens + fragmentation without improving speed. + + You can reduce M_MXFAST to 0 to disable all use of fastbins. This + causes the malloc algorithm to be a closer approximation of + fifo-best-fit in all cases, not just for larger requests, but will + generally cause it to be slower. +*/ + +#ifndef M_MXFAST +#define M_MXFAST 1 +#endif + +/* + M_TRIM_THRESHOLD is the maximum amount of unused top-most memory + to keep before releasing via malloc_trim in free(). + + Automatic trimming is mainly useful in long-lived programs. + Because trimming via sbrk can be slow on some systems, and can + sometimes be wasteful (in cases where programs immediately + afterward allocate more large chunks) the value should be high + enough so that your overall system performance would improve by + releasing this much memory. + + The trim threshold and the mmap control parameters (see below) + can be traded off with one another. Trimming and mmapping are + two different ways of releasing unused memory back to the + system. Between these two, it is often possible to keep + system-level demands of a long-lived program down to a bare + minimum. For example, in one test suite of sessions measuring + the XF86 X server on Linux, using a trim threshold of 128K and a + mmap threshold of 192K led to near-minimal long term resource + consumption. + + If you are using this malloc in a long-lived program, it should + pay to experiment with these values. As a rough guide, you + might set to a value close to the average size of a process + (program) running on your system. Releasing this much memory + would allow such a process to run in memory. Generally, it's + worth it to tune for trimming rather tham memory mapping when a + program undergoes phases where several large chunks are + allocated and released in ways that can reuse each other's + storage, perhaps mixed with phases where there are no such + chunks at all. And in well-behaved long-lived programs, + controlling release of large blocks via trimming versus mapping + is usually faster. + + However, in most programs, these parameters serve mainly as + protection against the system-level effects of carrying around + massive amounts of unneeded memory. Since frequent calls to + sbrk, mmap, and munmap otherwise degrade performance, the default + parameters are set to relatively high values that serve only as + safeguards. + + The trim value It must be greater than page size to have any useful + effect. To disable trimming completely, you can set to + (unsigned long)(-1) + + Trim settings interact with fastbin (MXFAST) settings: Unless + compiled with TRIM_FASTBINS defined, automatic trimming never takes + place upon freeing a chunk with size less than or equal to + MXFAST. Trimming is instead delayed until subsequent freeing of + larger chunks. However, you can still force an attempted trim by + calling malloc_trim. + + Also, trimming is not generally possible in cases where + the main arena is obtained via mmap. + + Note that the trick some people use of mallocing a huge space and + then freeing it at program startup, in an attempt to reserve system + memory, doesn't have the intended effect under automatic trimming, + since that memory will immediately be returned to the system. +*/ + +#define M_TRIM_THRESHOLD -1 + +/* + M_TOP_PAD is the amount of extra `padding' space to allocate or + retain whenever sbrk is called. It is used in two ways internally: + + * When sbrk is called to extend the top of the arena to satisfy + a new malloc request, this much padding is added to the sbrk + request. + + * When malloc_trim is called automatically from free(), + it is used as the `pad' argument. + + In both cases, the actual amount of padding is rounded + so that the end of the arena is always a system page boundary. + + The main reason for using padding is to avoid calling sbrk so + often. Having even a small pad greatly reduces the likelihood + that nearly every malloc request during program start-up (or + after trimming) will invoke sbrk, which needlessly wastes + time. + + Automatic rounding-up to page-size units is normally sufficient + to avoid measurable overhead, so the default is 0. However, in + systems where sbrk is relatively slow, it can pay to increase + this value, at the expense of carrying around more memory than + the program needs. +*/ + +#define M_TOP_PAD -2 + +/* + M_MMAP_THRESHOLD is the request size threshold for using mmap() + to service a request. Requests of at least this size that cannot + be allocated using already-existing space will be serviced via mmap. + (If enough normal freed space already exists it is used instead.) + + Using mmap segregates relatively large chunks of memory so that + they can be individually obtained and released from the host + system. A request serviced through mmap is never reused by any + other request (at least not directly; the system may just so + happen to remap successive requests to the same locations). + + Segregating space in this way has the benefits that: + + 1. Mmapped space can ALWAYS be individually released back + to the system, which helps keep the system level memory + demands of a long-lived program low. + 2. Mapped memory can never become `locked' between + other chunks, as can happen with normally allocated chunks, which + means that even trimming via malloc_trim would not release them. + 3. On some systems with "holes" in address spaces, mmap can obtain + memory that sbrk cannot. + + However, it has the disadvantages that: + + 1. The space cannot be reclaimed, consolidated, and then + used to service later requests, as happens with normal chunks. + 2. It can lead to more wastage because of mmap page alignment + requirements + 3. It causes malloc performance to be more dependent on host + system memory management support routines. + + The advantages of mmap nearly always outweigh disadvantages for + "large" chunks, but the value of "large" varies across systems. The + default is an empirically derived value that works well in most + systems. +*/ + +#define M_MMAP_THRESHOLD -3 + +/* + M_MMAP_MAX is the maximum number of requests to simultaneously + service using mmap. This parameter exists because + some systems have a limited number of internal tables for + use by mmap, and using more than a few of them may degrade + performance. + + The default is set to a value that serves only as a safeguard. + Setting to 0 disables use of mmap for servicing large requests. If + mmap is not supported on a system, the default value is 0, and + attempts to set it to non-zero values in mallopt will fail. +*/ + +#define M_MMAP_MAX -4 + +/* Unused SVID2/XPG mallopt options, listed for completeness */ + +#ifndef M_NBLKS +#define M_NLBLKS 2 /* UNUSED in this malloc */ +#endif +#ifndef M_GRAIN +#define M_GRAIN 3 /* UNUSED in this malloc */ +#endif +#ifndef M_KEEP +#define M_KEEP 4 /* UNUSED in this malloc */ +#endif + +/* + Some malloc.h's declare alloca, even though it is not part of malloc. +*/ + +#ifndef _ALLOCA_H +extern void *alloca(size_t); +#endif + +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain. Use, modify, and + redistribute this code without permission or acknowledgement in any + way you wish. Send questions, comments, complaints, performance + data, etc to dl@cs.oswego.edu + +* VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O), and link it into another program. All + of the compile-time options default to reasonable values for use on + most unix platforms. Compile -DWIN32 for reasonable defaults on windows. + You might later want to step through various compile-time and dynamic + tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. + +* Why use this malloc? + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and tunable. + Consistent balance across these factors results in a good general-purpose + allocator for malloc-intensive programs. + + The main properties of the algorithms are: + * For large (>= 512 bytes) requests, it is a pure best-fit allocator, + with ties normally decided via FIFO (i.e. least recently used). + * For small (<= 64 bytes by default) requests, it is a caching + allocator, that maintains pools of quickly recycled chunks. + * In between, and for combinations of large and small requests, it does + the best it can trying to meet both goals at once. + * For very large requests (>= 128KB by default), it relies on system + memory mapping facilities, if supported. + + For a longer but slightly out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + + You may already by default be using a C library containing a malloc + that is based on some version of this malloc (for example in + linux). You might still want to use the one in this file in order to + customize settings or to avoid overheads associated with library + versions. + +* Contents, described in more detail in "description of public routines" below. + + Standard (ANSI/SVID/...) functions: + malloc(size_t n); + calloc(size_t n_elements, size_t element_size); + free(Void_t* p); + realloc(Void_t* p, size_t n); + memalign(size_t alignment, size_t n); + valloc(size_t n); + mallinfo() + mallopt(int parameter_number, int parameter_value) + + Additional functions: + independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + pvalloc(size_t n); + cfree(Void_t* p); + malloc_trim(size_t pad); + malloc_usable_size(Void_t* p); + malloc_stats(); + +* Vital statistics: + + Supported pointer representation: 4 or 8 bytes + Supported size_t representation: 4 or 8 bytes + Note that size_t is allowed to be 4 bytes even if pointers are 8. + You can adjust this by defining INTERNAL_SIZE_T + + Alignment: 2 * sizeof(size_t) (default) + (i.e., 8 byte alignment with 4byte size_t). This suffices for + nearly all current machines and C compilers. However, you can + define MALLOC_ALIGNMENT to be wider than this if necessary. + + Minimum overhead per allocated chunk: 4 or 8 bytes + Each malloced chunk has a hidden word of overhead holding size + and status information. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) + 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) + + When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte + ptrs but 4 byte size) or 24 (for 8/8) additional bytes are + needed; 4 (8) for a trailing size field and 8 (16) bytes for + free list pointers. Thus, the minimum allocatable size is + 16/24/32 bytes. + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is 2 * + sizeof(size_t) bytes plus the remainder from a system page (the + minimal mmap unit); typically 4096 or 8192 bytes. + + Maximum allocated size: 4-byte size_t: 2^32 minus about two pages + 8-byte size_t: 2^64 minus about two pages + + It is assumed that (possibly signed) size_t values suffice to + represent chunk sizes. `Possibly signed' is due to the fact + that `size_t' may be defined on a system as either a signed or + an unsigned type. The ISO C standard says that it must be + unsigned, but a few systems are known not to adhere to this. + Additionally, even when size_t is unsigned, sbrk (which is by + default used to obtain memory from system) accepts signed + arguments, and may not be able to handle size_t-wide arguments + with negative sign bit. Generally, values that would + appear as negative after accounting for overhead and alignment + are supported only via mmap(), which does not have this + limitation. + + Requests for sizes outside the allowed range will perform an optional + failure action and then return null. (Requests may also + also fail because a system is out of memory.) + + Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined + + When USE_MALLOC_LOCK is defined, wrappers are created to + surround every public call with either a pthread mutex or + a win32 spinlock (depending on WIN32). This is not + especially fast, and can be a major bottleneck. + It is designed only to provide minimal protection + in concurrent environments, and to provide a basis for + extensions. If you are using malloc in a concurrent program, + you would be far better off obtaining ptmalloc, which is + derived from a version of this malloc, and is well-tuned for + concurrent programs. (See http://www.malloc.de) Note that + even when USE_MALLOC_LOCK is defined, you can can guarantee + full thread-safety only if no threads acquire memory through + direct calls to MORECORE or other system-level allocators. + + Compliance: I believe it is compliant with the 1997 Single Unix Specification + (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Synopsis of compile-time options: + + People have reported using previous versions of this malloc on all + versions of Unix, sometimes by tweaking some of the defines + below. It has been tested most extensively on Solaris and + Linux. It is also reported to work on WIN32 platforms. + People also report using it in stand-alone embedded systems. + + The implementation is in straight, hand-tuned ANSI C. It is not + at all modular. (Sorry!) It uses a lot of macros. To be at all + usable, this code should be compiled using an optimizing compiler + (for example gcc -O3) that can simplify expressions and control + paths. (FAQ: some macros import variables as arguments rather than + declare locals because people reported that some debuggers + otherwise get confused.) + + OPTION DEFAULT VALUE + + Compilation Environment options: + + __STD_C derived from C compiler defines + WIN32 NOT defined + HAVE_MEMCPY defined + USE_MEMCPY 1 if HAVE_MEMCPY is defined + HAVE_MMAP defined as 1 + MMAP_CLEARS 1 + HAVE_MREMAP 0 unless linux defined + malloc_getpagesize derived from system #includes, or 4096 if not + HAVE_USR_INCLUDE_MALLOC_H NOT defined + LACKS_UNISTD_H NOT defined unless WIN32 + LACKS_SYS_PARAM_H NOT defined unless WIN32 + LACKS_SYS_MMAN_H NOT defined unless WIN32 + LACKS_FCNTL_H NOT defined + + Changing default word sizes: + + INTERNAL_SIZE_T size_t + MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) + PTR_UINT unsigned long + CHUNK_SIZE_T unsigned long + + Configuration and functionality options: + + USE_DL_PREFIX NOT defined + USE_PUBLIC_MALLOC_WRAPPERS NOT defined + USE_MALLOC_LOCK NOT defined + DL_DEBUG NOT defined + REALLOC_ZERO_BYTES_FREES NOT defined + MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op + TRIM_FASTBINS 0 + FIRST_SORTED_BIN_SIZE 512 + + Options for customizing MORECORE: + + MORECORE sbrk + MORECORE_CONTIGUOUS 1 + MORECORE_CANNOT_TRIM NOT defined + MMAP_AS_MORECORE_SIZE (1024 * 1024) + + Tuning options that are also dynamically changeable via mallopt: + + DEFAULT_MXFAST 64 + DEFAULT_TRIM_THRESHOLD 256 * 1024 + DEFAULT_TOP_PAD 0 + DEFAULT_MMAP_THRESHOLD 256 * 1024 + DEFAULT_MMAP_MAX 65536 + + There are several other #defined constants and macros that you + probably don't want to touch unless you are extending or adapting malloc. +*/ + +/* + WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. +*/ + +/* #define WIN32 */ + +#ifdef WIN32 + +#define WIN32_LEAN_AND_MEAN +#include + +/* Win32 doesn't supply or need the following headers */ +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H + +/* Use the supplied emulation of sbrk */ +#define MORECORE sbrk +#define MORECORE_CONTIGUOUS 1 +#define MORECORE_FAILURE ((void *)(-1)) + +/* Use the supplied emulation of mmap and munmap */ +#define HAVE_MMAP 1 +#define MUNMAP_FAILURE (-1) +#define MMAP_CLEARS 1 + +/* These values don't really matter in windows mmap emulation */ +#define MAP_PRIVATE 1 +#define MAP_ANONYMOUS 2 +#define PROT_READ 1 +#define PROT_WRITE 2 + +/* Emulation functions defined at the end of this file */ + +/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */ +#ifdef USE_MALLOC_LOCK +static int slwait(int *sl); +static int slrelease(int *sl); +#endif + +static long getpagesize(void); +static long getregionsize(void); +static void *sbrk(long size); +static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg); +static long munmap(void *ptr, long size); + +static void vminfo(unsigned long *free, unsigned long *reserved, unsigned long *committed); +static int cpuinfo(int whole, unsigned long *kernel, unsigned long *user); + +#endif + +/* + __STD_C should be nonzero if using ANSI-standard C compiler, a C++ + compiler, or a C compiler sufficiently close to ANSI to get away + with it. +*/ + +#ifndef __STD_C +#if defined(__STDC__) || defined(_cplusplus) +#define __STD_C 1 +#else +#define __STD_C 0 +#endif +#endif /*__STD_C*/ + +/* + Void_t* is the pointer type that malloc should say it returns +*/ + +#ifndef Void_t +#if (__STD_C || defined(WIN32)) +#define Void_t void +#else +#define Void_t char +#endif +#endif /*Void_t*/ + +#if __STD_C +#include /* for size_t */ +#else +#include +#endif + +/* define LACKS_UNISTD_H if your system does not have a . */ + +/* #define LACKS_UNISTD_H */ + +#ifndef LACKS_UNISTD_H +#include +#endif + +/* define LACKS_SYS_PARAM_H if your system does not have a . */ + +/* #define LACKS_SYS_PARAM_H */ + +#include /* needed for optional MALLOC_FAILURE_ACTION */ +#include /* needed for malloc_stats */ + +/* + Debugging: + + Because freed chunks may be overwritten with bookkeeping fields, this + malloc will often die when freed memory is overwritten by user + programs. This can be very effective (albeit in an annoying way) + in helping track down dangling pointers. + + If you compile with -DDL_DEBUG, a number of assertion checks are + enabled that will catch more memory errors. You probably won't be + able to make much sense of the actual assertion errors, but they + should help you locate incorrectly overwritten memory. The + checking is fairly extensive, and will slow down execution + noticeably. Calling malloc_stats or mallinfo with DL_DEBUG set will + attempt to check every non-mmapped allocated and free chunk in the + course of computing the summmaries. (By nature, mmapped regions + cannot be checked very much automatically.) + + Setting DL_DEBUG may also be helpful if you are trying to modify + this code. The assertions in the check routines spell out in more + detail the assumptions and invariants underlying the algorithms. + + Setting DL_DEBUG does NOT provide an automated mechanism for checking + that all accesses to malloced memory stay within their + bounds. However, there are several add-ons and adaptations of this + or other mallocs available that do this. +*/ + +#include + +/* + The unsigned integer type used for comparing any two chunk sizes. + This should be at least as wide as size_t, but should not be signed. +*/ + +#ifndef CHUNK_SIZE_T +#define CHUNK_SIZE_T unsigned long +#endif + +/* + The unsigned integer type used to hold addresses when they are are + manipulated as integers. Except that it is not defined on all + systems, intptr_t would suffice. +*/ +#ifndef PTR_UINT +#define PTR_UINT unsigned long +#endif + +/* + INTERNAL_SIZE_T is the word-size used for internal bookkeeping + of chunk sizes. + + The default version is the same as size_t. + + While not strictly necessary, it is best to define this as an + unsigned type, even if size_t is a signed type. This may avoid some + artificial size limitations on some systems. + + On a 64-bit machine, you may be able to reduce malloc overhead by + defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the + expense of not being able to handle more than 2^32 of malloced + space. If this limitation is acceptable, you are encouraged to set + this unless you are on a platform requiring 16byte alignments. In + this case the alignment requirements turn out to negate any + potential advantages of decreasing size_t word size. + + Implementors: Beware of the possible combinations of: + - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, + and might be the same width as int or as long + - size_t might have different width and signedness as INTERNAL_SIZE_T + - int and long might be 32 or 64 bits, and might be the same width + To deal with this, most comparisons and difference computations + among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being + aware of the fact that casting an unsigned int to a wider long does + not sign-extend. (This also makes checking for negative numbers + awkward.) Some of these casts result in harmless compiler warnings + on some systems. +*/ + +#ifndef INTERNAL_SIZE_T +#define INTERNAL_SIZE_T size_t +#endif + +/* The corresponding word size */ +#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) + +/* + MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. + It must be a power of two at least 2 * SIZE_SZ, even on machines + for which smaller alignments would suffice. It may be defined as + larger than this though. Note however that code and data structures + are optimized for the case of 8-byte alignment. +*/ + +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT (2 * SIZE_SZ) +#endif + +/* The corresponding bit mask value */ +#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) + +/* + REALLOC_ZERO_BYTES_FREES should be set if a call to + realloc with zero bytes should be the same as a call to free. + Some people think it should. Otherwise, since this malloc + returns a unique pointer for malloc(0), so does realloc(p, 0). +*/ + +/* #define REALLOC_ZERO_BYTES_FREES */ + +/* + TRIM_FASTBINS controls whether free() of a very small chunk can + immediately lead to trimming. Setting to true (1) can reduce memory + footprint, but will almost always slow down programs that use a lot + of small chunks. + + Define this only if you are willing to give up some speed to more + aggressively reduce system-level memory footprint when releasing + memory in programs that use many small chunks. You can get + essentially the same effect by setting MXFAST to 0, but this can + lead to even greater slowdowns in programs using many small chunks. + TRIM_FASTBINS is an in-between compile-time option, that disables + only those chunks bordering topmost memory from being placed in + fastbins. +*/ + +#ifndef TRIM_FASTBINS +#define TRIM_FASTBINS 0 +#endif + +/* + USE_DL_PREFIX will prefix all public routines with the string 'dl'. + This is necessary when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. +*/ + +/* #define USE_DL_PREFIX */ + +/* + USE_MALLOC_LOCK causes wrapper functions to surround each + callable routine with pthread mutex lock/unlock. + + USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined +*/ + +/* #define USE_MALLOC_LOCK */ + +/* + If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is + actually a wrapper function that first calls MALLOC_PREACTION, then + calls the internal routine, and follows it with + MALLOC_POSTACTION. This is needed for locking, but you can also use + this, without USE_MALLOC_LOCK, for purposes of interception, + instrumentation, etc. It is a sad fact that using wrappers often + noticeably degrades performance of malloc-intensive programs. +*/ + +#ifdef USE_MALLOC_LOCK +#define USE_PUBLIC_MALLOC_WRAPPERS +#else +/* #define USE_PUBLIC_MALLOC_WRAPPERS */ +#endif + +/* + Two-phase name translation. + All of the actual routines are given mangled names. + When wrappers are used, they become the public callable versions. + When DL_PREFIX is used, the callable names are prefixed. +*/ + +#ifndef USE_PUBLIC_MALLOC_WRAPPERS +#define cALLOc public_cALLOc +#define fREe public_fREe +#define cFREe public_cFREe +#define mALLOc public_mALLOc +#define mEMALIGn public_mEMALIGn +#define rEALLOc public_rEALLOc +#define vALLOc public_vALLOc +#define pVALLOc public_pVALLOc +#define mALLINFo public_mALLINFo +#define mALLOPt public_mALLOPt +#define mTRIm public_mTRIm +#define mSTATs public_mSTATs +#define mUSABLe public_mUSABLe +#define iCALLOc public_iCALLOc +#define iCOMALLOc public_iCOMALLOc +#endif + +#ifdef USE_DL_PREFIX +#define public_cALLOc dlcalloc +#define public_fREe dlfree +#define public_cFREe dlcfree +#define public_mALLOc dlmalloc +#define public_mEMALIGn dlmemalign +#define public_rEALLOc dlrealloc +#define public_vALLOc dlvalloc +#define public_pVALLOc dlpvalloc +#define public_mALLINFo dlmallinfo +#define public_mALLOPt dlmallopt +#define public_mTRIm dlmalloc_trim +#define public_mSTATs dlmalloc_stats +#define public_mUSABLe dlmalloc_usable_size +#define public_iCALLOc dlindependent_calloc +#define public_iCOMALLOc dlindependent_comalloc +#else /* USE_DL_PREFIX */ +#define public_cALLOc calloc +#define public_fREe free +#define public_cFREe cfree +#define public_mALLOc malloc +#define public_mEMALIGn memalign +#define public_rEALLOc realloc +#define public_vALLOc valloc +#define public_pVALLOc pvalloc +#define public_mALLINFo mallinfo +#define public_mALLOPt mallopt +#define public_mTRIm malloc_trim +#define public_mSTATs malloc_stats +#define public_mUSABLe malloc_usable_size +#define public_iCALLOc independent_calloc +#define public_iCOMALLOc independent_comalloc +#endif /* USE_DL_PREFIX */ + +/* + HAVE_MEMCPY should be defined if you are not otherwise using + ANSI STD C, but still have memcpy and memset in your C library + and want to use them in calloc and realloc. Otherwise simple + macro versions are defined below. + + USE_MEMCPY should be defined as 1 if you actually want to + have memset and memcpy called. People report that the macro + versions are faster than libc versions on some systems. + + Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks + (of <= 36 bytes) are manually unrolled in realloc and calloc. +*/ + +#define HAVE_MEMCPY + +#ifndef USE_MEMCPY +#ifdef HAVE_MEMCPY +#define USE_MEMCPY 1 +#else +#define USE_MEMCPY 0 +#endif +#endif + +#if (__STD_C || defined(HAVE_MEMCPY)) + +#ifdef WIN32 +/* On Win32 memset and memcpy are already declared in windows.h */ +#else +#if __STD_C +void *memset(void *, int, size_t); +void *memcpy(void *, const void *, size_t); +#else +Void_t *memset(); +Void_t *memcpy(); +#endif +#endif +#endif + +/* + MALLOC_FAILURE_ACTION is the action to take before "return 0" when + malloc fails to be able to return memory, either because memory is + exhausted or because of illegal arguments. + + By default, sets errno if running on STD_C platform, else does nothing. +*/ + +#ifndef MALLOC_FAILURE_ACTION +#if __STD_C +#define MALLOC_FAILURE_ACTION \ + errno = ENOMEM; + +#else +#define MALLOC_FAILURE_ACTION +#endif +#endif + +/* + MORECORE-related declarations. By default, rely on sbrk +*/ + +#ifdef LACKS_UNISTD_H +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +#if __STD_C +extern Void_t *sbrk(ptrdiff_t); +#else +extern Void_t *sbrk(); +#endif +#endif +#endif + +/* + MORECORE is the name of the routine to call to obtain more memory + from the system. See below for general guidance on writing + alternative MORECORE functions, as well as a version for WIN32 and a + sample version for pre-OSX macos. +*/ + +// #define _GNU_SOURCE +// #include +extern void *sbrk(intptr_t __delta) __THROW; +#define MORECORE sbrk + +/* + MORECORE_FAILURE is the value returned upon failure of MORECORE + as well as mmap. Since it cannot be an otherwise valid memory address, + and must reflect values of standard sys calls, you probably ought not + try to redefine it. +*/ + +#ifndef MORECORE_FAILURE +#define MORECORE_FAILURE (-1) +#endif + +/* + If MORECORE_CONTIGUOUS is true, take advantage of fact that + consecutive calls to MORECORE with positive arguments always return + contiguous increasing addresses. This is true of unix sbrk. Even + if not defined, when regions happen to be contiguous, malloc will + permit allocations spanning regions obtained from different + calls. But defining this when applicable enables some stronger + consistency checks and space efficiencies. +*/ + +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif + +/* + Define MORECORE_CANNOT_TRIM if your version of MORECORE + cannot release space back to the system when given negative + arguments. This is generally necessary only if you are using + a hand-crafted MORECORE function that cannot handle negative arguments. +*/ + +/* #define MORECORE_CANNOT_TRIM */ + +/* + Define HAVE_MMAP as true to optionally make malloc() use mmap() to + allocate very large blocks. These will be returned to the + operating system immediately after a free(). Also, if mmap + is available, it is used as a backup strategy in cases where + MORECORE fails to provide space from system. + + This malloc is best tuned to work with mmap for large requests. + If you do not have mmap, operations involving very large chunks (1MB + or so) may be slower than you'd like. +*/ + +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif + +#if HAVE_MMAP +/* + Standard unix mmap using /dev/zero clears memory so calloc doesn't + need to. +*/ + +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif + +#else /* no mmap */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif +#endif + +/* + MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if + sbrk fails, and mmap is used as a backup (which is done only if + HAVE_MMAP). The value must be a multiple of page size. This + backup strategy generally applies only when systems have "holes" in + address space, so sbrk cannot perform contiguous expansion, but + there is still space available on system. On systems for which + this is known to be useful (i.e. most linux kernels), this occurs + only when programs allocate huge amounts of memory. Between this, + and the fact that mmap regions tend to be limited, the size should + be large, to avoid too many mmap calls and thus avoid running out + of kernel resources. +*/ + +#ifndef MMAP_AS_MORECORE_SIZE +#define MMAP_AS_MORECORE_SIZE (1024 * 1024) +#endif + +/* + Define HAVE_MREMAP to make realloc() use mremap() to re-allocate + large blocks. This is currently only possible on Linux with + kernel versions newer than 1.3.77. +*/ + +#ifndef HAVE_MREMAP +#if defined(linux) && defined(__USE_GNU) +#define HAVE_MREMAP 1 +#else +#define HAVE_MREMAP 0 +#endif + +#endif /* HAVE_MMAP */ + +/* + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. Note that this value is + cached during initialization into a field of malloc_state. So even + if malloc_getpagesize is a function, it is only called once. + + The following mechanics for getpagesize were adapted from bsd/gnu + getpagesize.h. If none of the system-probes here apply, a value of + 4096 is used, which should be OK: If they don't apply, then using + the actual value probably doesn't impact performance. +*/ + +#ifndef malloc_getpagesize + +#ifndef LACKS_UNISTD_H +#include +#endif + +#ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +#ifndef _SC_PAGE_SIZE +#define _SC_PAGE_SIZE _SC_PAGESIZE +#endif +#endif + +#ifdef _SC_PAGE_SIZE +#define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +#else +#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) +extern size_t getpagesize(); +#define malloc_getpagesize getpagesize() +#else +#ifdef WIN32 /* use supplied emulation of getpagesize */ +#define malloc_getpagesize getpagesize() +#else +#ifndef LACKS_SYS_PARAM_H +#include +#endif +#ifdef EXEC_PAGESIZE +#define malloc_getpagesize EXEC_PAGESIZE +#else +#ifdef NBPG +#ifndef CLSIZE +#define malloc_getpagesize NBPG +#else +#define malloc_getpagesize (NBPG * CLSIZE) +#endif +#else +#ifdef NBPC +#define malloc_getpagesize NBPC +#else +#ifdef PAGESIZE +#define malloc_getpagesize PAGESIZE +#else /* just guess */ +#define malloc_getpagesize (4096) +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif + +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any SVID/XPG compliant system that has + a /usr/include/malloc.h defining struct mallinfo. (If you'd like to + install such a thing yourself, cut out the preliminary declarations + as described above and below and save them in a malloc.h file. But + there's no compelling reason to bother to do this.) + + The main declaration needed is the mallinfo struct that is returned + (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a + bunch of fields that are not even meaningful in this version of + malloc. These fields are are instead filled by mallinfo() with + other numbers that might be of interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else an SVID2/XPG2 compliant + version is declared below. These must be precisely the same for + mallinfo() to work. The original SVID version of this struct, + defined on most systems with mallinfo, declares all fields as + ints. But some others define as unsigned long. If your system + defines the fields using a type of different width than listed here, + you must #include your system version and #define + HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else + +/* SVID2/XPG mallinfo structure */ + +/* + SVID/XPG defines four standard parameter numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports other + options in mallopt described below. +*/ +#endif + +/* ---------- description of public routines ------------ */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or null + if no space is available. Additionally, on failure, errno is + set to ENOMEM on ANSI C systems. + + If n is zero, malloc returns a minumum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit + systems.) On most systems, size_t is an unsigned type, so calls + with negative arguments are interpreted as requests for huge amounts + of space, which will often fail. The maximum supported value of n + differs across systems, but is in all cases less than the maximum + representable value of a size_t. +*/ +#if __STD_C +Void_t *public_mALLOc(size_t); +#else +Void_t *public_mALLOc(); +#endif + +/* + free(Void_t* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. It can have arbitrary (i.e., bad!) + effects if p has already been freed. + + Unless disabled (using mallopt), freeing very large spaces will + when possible, automatically trigger operations that give + back unused memory to the system, thus reducing program footprint. +*/ +#if __STD_C +void public_fREe(Void_t *); +#else +void public_fREe(); +#endif + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +#if __STD_C +Void_t *public_cALLOc(size_t, size_t); +#else +Void_t *public_cALLOc(); +#endif + +/* + realloc(Void_t* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p when possible, otherwise it employs the + equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. Unless the #define + REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of + zero (re)allocates a minimum-sized chunk. + + Large chunks that were internally obtained via mmap will always + be reallocated using malloc-copy-free sequences unless + the system supports MREMAP (currently only linux). + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +#if __STD_C +Void_t *public_rEALLOc(Void_t *, size_t); +#else +Void_t *public_rEALLOc(); +#endif + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +#if __STD_C +Void_t *public_mEMALIGn(size_t, size_t); +#else +Void_t *public_mEMALIGn(); +#endif + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +#if __STD_C +Void_t *public_vALLOc(size_t); +#else +Void_t *public_vALLOc(); +#endif + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports four + other options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_MXFAST 1 64 0-80 (0 disables fastbins) + M_TRIM_THRESHOLD -1 256*1024 any (-1U disables trimming) + M_TOP_PAD -2 0 any + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) + M_MMAP_MAX -4 65536 any (0 disables use of mmap) +*/ +#if __STD_C +int public_mALLOPt(int, int); +#else +int public_mALLOPt(); +#endif + +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: the number of fastbin blocks (i.e., small chunks that + have been freed but not use resused or consolidated) + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: total bytes held in fastbin blocks + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +#if __STD_C +struct mallinfo public_mALLINFo(void); +#else +struct mallinfo public_mALLINFo(); +#endif + +/* + independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +#if __STD_C +Void_t **public_iCALLOc(size_t, size_t, Void_t **); +#else +Void_t **public_iCALLOc(); +#endif + +/* + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +#if __STD_C +Void_t **public_iCOMALLOc(size_t, size_t *, Void_t **); +#else +Void_t **public_iCOMALLOc(); +#endif + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +#if __STD_C +Void_t *public_pVALLOc(size_t); +#else +Void_t *public_pVALLOc(); +#endif + +/* + cfree(Void_t* p); + Equivalent to free(p). + + cfree is needed/defined on some systems that pair it with calloc, + for odd historical reasons (such as: cfree is used in example + code in the first edition of K&R). +*/ +#if __STD_C +void public_cFREe(Void_t *); +#else +void public_cFREe(); +#endif + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative + arguments to sbrk) if there is unused memory at the `high' end of + the malloc pool. You can call this after freeing large blocks of + memory to potentially reduce the system-level memory requirements + of a program. However, it cannot guarantee to reduce memory. Under + some allocation patterns, some large free blocks of memory will be + locked between two used chunks, so they cannot be given back to + the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, + only the minimum amount of memory to maintain internal data + structures will be left (one page or less). Non-zero arguments + can be supplied to maintain enough trailing space to service + future expected allocations without having to re-obtain memory + from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. + On systems that do not support "negative sbrks", it will always + rreturn 0. +*/ +#if __STD_C +int public_mTRIm(size_t); +#else +int public_mTRIm(); +#endif + +/* + malloc_usable_size(Void_t* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); + +*/ +#if __STD_C +size_t public_mUSABLe(Void_t *); +#else +size_t public_mUSABLe(); +#endif + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. + +*/ +#if __STD_C +void public_mSTATs(void); +#else +void public_mSTATs(void); +#endif + +/* mallopt tuning options */ + +/* + M_MXFAST is the maximum request size used for "fastbins", special bins + that hold returned chunks without consolidating their spaces. This + enables future requests for chunks of the same size to be handled + very quickly, but can increase fragmentation, and thus increase the + overall memory footprint of a program. + + This malloc manages fastbins very conservatively yet still + efficiently, so fragmentation is rarely a problem for values less + than or equal to the default. The maximum supported value of MXFAST + is 80. You wouldn't want it any higher than this anyway. Fastbins + are designed especially for use with many small structs, objects or + strings -- the default handles structs/objects/arrays with sizes up + to 16 4byte fields, or small strings representing words, tokens, + etc. Using fastbins for larger objects normally worsens + fragmentation without improving speed. + + M_MXFAST is set in REQUEST size units. It is internally used in + chunksize units, which adds padding and alignment. You can reduce + M_MXFAST to 0 to disable all use of fastbins. This causes the malloc + algorithm to be a closer approximation of fifo-best-fit in all cases, + not just for larger requests, but will generally cause it to be + slower. +*/ + +/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ +#ifndef M_MXFAST +#define M_MXFAST 1 +#endif + +#ifndef DEFAULT_MXFAST +#define DEFAULT_MXFAST 64 +#endif + +/* + M_TRIM_THRESHOLD is the maximum amount of unused top-most memory + to keep before releasing via malloc_trim in free(). + + Automatic trimming is mainly useful in long-lived programs. + Because trimming via sbrk can be slow on some systems, and can + sometimes be wasteful (in cases where programs immediately + afterward allocate more large chunks) the value should be high + enough so that your overall system performance would improve by + releasing this much memory. + + The trim threshold and the mmap control parameters (see below) + can be traded off with one another. Trimming and mmapping are + two different ways of releasing unused memory back to the + system. Between these two, it is often possible to keep + system-level demands of a long-lived program down to a bare + minimum. For example, in one test suite of sessions measuring + the XF86 X server on Linux, using a trim threshold of 128K and a + mmap threshold of 192K led to near-minimal long term resource + consumption. + + If you are using this malloc in a long-lived program, it should + pay to experiment with these values. As a rough guide, you + might set to a value close to the average size of a process + (program) running on your system. Releasing this much memory + would allow such a process to run in memory. Generally, it's + worth it to tune for trimming rather tham memory mapping when a + program undergoes phases where several large chunks are + allocated and released in ways that can reuse each other's + storage, perhaps mixed with phases where there are no such + chunks at all. And in well-behaved long-lived programs, + controlling release of large blocks via trimming versus mapping + is usually faster. + + However, in most programs, these parameters serve mainly as + protection against the system-level effects of carrying around + massive amounts of unneeded memory. Since frequent calls to + sbrk, mmap, and munmap otherwise degrade performance, the default + parameters are set to relatively high values that serve only as + safeguards. + + The trim value must be greater than page size to have any useful + effect. To disable trimming completely, you can set to + (unsigned long)(-1) + + Trim settings interact with fastbin (MXFAST) settings: Unless + TRIM_FASTBINS is defined, automatic trimming never takes place upon + freeing a chunk with size less than or equal to MXFAST. Trimming is + instead delayed until subsequent freeing of larger chunks. However, + you can still force an attempted trim by calling malloc_trim. + + Also, trimming is not generally possible in cases where + the main arena is obtained via mmap. + + Note that the trick some people use of mallocing a huge space and + then freeing it at program startup, in an attempt to reserve system + memory, doesn't have the intended effect under automatic trimming, + since that memory will immediately be returned to the system. +*/ + +#define M_TRIM_THRESHOLD -1 + +#ifndef DEFAULT_TRIM_THRESHOLD +#define DEFAULT_TRIM_THRESHOLD (256 * 1024) +#endif + +/* + M_TOP_PAD is the amount of extra `padding' space to allocate or + retain whenever sbrk is called. It is used in two ways internally: + + * When sbrk is called to extend the top of the arena to satisfy + a new malloc request, this much padding is added to the sbrk + request. + + * When malloc_trim is called automatically from free(), + it is used as the `pad' argument. + + In both cases, the actual amount of padding is rounded + so that the end of the arena is always a system page boundary. + + The main reason for using padding is to avoid calling sbrk so + often. Having even a small pad greatly reduces the likelihood + that nearly every malloc request during program start-up (or + after trimming) will invoke sbrk, which needlessly wastes + time. + + Automatic rounding-up to page-size units is normally sufficient + to avoid measurable overhead, so the default is 0. However, in + systems where sbrk is relatively slow, it can pay to increase + this value, at the expense of carrying around more memory than + the program needs. +*/ + +#define M_TOP_PAD -2 + +#ifndef DEFAULT_TOP_PAD +#define DEFAULT_TOP_PAD (0) +#endif + +/* + M_MMAP_THRESHOLD is the request size threshold for using mmap() + to service a request. Requests of at least this size that cannot + be allocated using already-existing space will be serviced via mmap. + (If enough normal freed space already exists it is used instead.) + + Using mmap segregates relatively large chunks of memory so that + they can be individually obtained and released from the host + system. A request serviced through mmap is never reused by any + other request (at least not directly; the system may just so + happen to remap successive requests to the same locations). + + Segregating space in this way has the benefits that: + + 1. Mmapped space can ALWAYS be individually released back + to the system, which helps keep the system level memory + demands of a long-lived program low. + 2. Mapped memory can never become `locked' between + other chunks, as can happen with normally allocated chunks, which + means that even trimming via malloc_trim would not release them. + 3. On some systems with "holes" in address spaces, mmap can obtain + memory that sbrk cannot. + + However, it has the disadvantages that: + + 1. The space cannot be reclaimed, consolidated, and then + used to service later requests, as happens with normal chunks. + 2. It can lead to more wastage because of mmap page alignment + requirements + 3. It causes malloc performance to be more dependent on host + system memory management support routines which may vary in + implementation quality and may impose arbitrary + limitations. Generally, servicing a request via normal + malloc steps is faster than going through a system's mmap. + + The advantages of mmap nearly always outweigh disadvantages for + "large" chunks, but the value of "large" varies across systems. The + default is an empirically derived value that works well in most + systems. +*/ + +#define M_MMAP_THRESHOLD -3 + +#ifndef DEFAULT_MMAP_THRESHOLD +#define DEFAULT_MMAP_THRESHOLD (256 * 1024) +#endif + +/* + M_MMAP_MAX is the maximum number of requests to simultaneously + service using mmap. This parameter exists because +. Some systems have a limited number of internal tables for + use by mmap, and using more than a few of them may degrade + performance. + + The default is set to a value that serves only as a safeguard. + Setting to 0 disables use of mmap for servicing large requests. If + HAVE_MMAP is not set, the default value is 0, and attempts to set it + to non-zero values in mallopt will fail. +*/ + +#define M_MMAP_MAX -4 + +#ifndef DEFAULT_MMAP_MAX +#if HAVE_MMAP +#define DEFAULT_MMAP_MAX (65536) +#else +#define DEFAULT_MMAP_MAX (0) +#endif +#endif + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/* --------------------- public wrappers ---------------------- */ + +#ifdef USE_PUBLIC_MALLOC_WRAPPERS + +/* Declare all routines as internal */ +#if __STD_C +static Void_t *mALLOc(size_t); +static void fREe(Void_t *); +static Void_t *rEALLOc(Void_t *, size_t); +static Void_t *mEMALIGn(size_t, size_t); +static Void_t *vALLOc(size_t); +static Void_t *pVALLOc(size_t); +static Void_t *cALLOc(size_t, size_t); +static Void_t **iCALLOc(size_t, size_t, Void_t **); +static Void_t **iCOMALLOc(size_t, size_t *, Void_t **); +static void cFREe(Void_t *); +static int mTRIm(size_t); +static size_t mUSABLe(Void_t *); +static void mSTATs(); +static int mALLOPt(int, int); +static struct mallinfo mALLINFo(void); +#else +static Void_t *mALLOc(); +static void fREe(); +static Void_t *rEALLOc(); +static Void_t *mEMALIGn(); +static Void_t *vALLOc(); +static Void_t *pVALLOc(); +static Void_t *cALLOc(); +static Void_t **iCALLOc(); +static Void_t **iCOMALLOc(); +static void cFREe(); +static int mTRIm(); +static size_t mUSABLe(); +static void mSTATs(); +static int mALLOPt(); +static struct mallinfo mALLINFo(); +#endif + +/* + MALLOC_PREACTION and MALLOC_POSTACTION should be + defined to return 0 on success, and nonzero on failure. + The return value of MALLOC_POSTACTION is currently ignored + in wrapper functions since there is no reasonable default + action to take on failure. +*/ + +#ifdef USE_MALLOC_LOCK + +#ifdef WIN32 + +static int mALLOC_MUTEx; +#define MALLOC_PREACTION slwait(&mALLOC_MUTEx) +#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx) + +#else + +#include + +static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER; + +#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx) +#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx) + +#endif /* USE_MALLOC_LOCK */ + +#else + +/* Substitute anything you like for these */ + +#define MALLOC_PREACTION (0) +#define MALLOC_POSTACTION (0) + +#endif + +Void_t *public_mALLOc(size_t bytes) { + Void_t *m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = mALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +void public_fREe(Void_t *m) { + if (MALLOC_PREACTION != 0) { + return; + } + fREe(m); + if (MALLOC_POSTACTION != 0) { + } +} + +Void_t *public_rEALLOc(Void_t *m, size_t bytes) { + if (MALLOC_PREACTION != 0) { + return 0; + } + m = rEALLOc(m, bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t *public_mEMALIGn(size_t alignment, size_t bytes) { + Void_t *m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = mEMALIGn(alignment, bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t *public_vALLOc(size_t bytes) { + Void_t *m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = vALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t *public_pVALLOc(size_t bytes) { + Void_t *m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = pVALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t *public_cALLOc(size_t n, size_t elem_size) { + Void_t *m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = cALLOc(n, elem_size); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t **public_iCALLOc(size_t n, size_t elem_size, Void_t **chunks) { + Void_t **m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = iCALLOc(n, elem_size, chunks); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t **public_iCOMALLOc(size_t n, size_t sizes[], Void_t **chunks) { + Void_t **m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = iCOMALLOc(n, sizes, chunks); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +void public_cFREe(Void_t *m) { + if (MALLOC_PREACTION != 0) { + return; + } + cFREe(m); + if (MALLOC_POSTACTION != 0) { + } +} + +int public_mTRIm(size_t s) { + int result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mTRIm(s); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +size_t public_mUSABLe(Void_t *m) { + size_t result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mUSABLe(m); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +void public_mSTATs() { + if (MALLOC_PREACTION != 0) { + return; + } + mSTATs(); + if (MALLOC_POSTACTION != 0) { + } +} + +struct mallinfo public_mALLINFo() { + struct mallinfo m; + if (MALLOC_PREACTION != 0) { + struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return nm; + } + m = mALLINFo(); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +int public_mALLOPt(int p, int v) { + int result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mALLOPt(p, v); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +#endif + +/* ------------- Optional versions of memcopy ---------------- */ + +#if USE_MEMCPY + +/* + Note: memcpy is ONLY invoked with non-overlapping regions, + so the (usually slower) memmove is not needed. +*/ + +#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) +#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) + +#else /* !USE_MEMCPY */ + +/* Use Duff's device for good zeroing/copying performance. */ + +#define MALLOC_ZERO(charp, nbytes) \ + do { \ + INTERNAL_SIZE_T *mzp = (INTERNAL_SIZE_T *)(charp); \ + CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) \ + mcn = 0; \ + else { \ + mcn = (mctmp - 1) / 8; \ + mctmp %= 8; \ + } \ + switch (mctmp) { \ + case 0: \ + for (;;) { \ + *mzp++ = 0; \ + case 7: \ + *mzp++ = 0; \ + case 6: \ + *mzp++ = 0; \ + case 5: \ + *mzp++ = 0; \ + case 4: \ + *mzp++ = 0; \ + case 3: \ + *mzp++ = 0; \ + case 2: \ + *mzp++ = 0; \ + case 1: \ + *mzp++ = 0; \ + if (mcn <= 0) \ + break; \ + mcn--; \ + } \ + } \ + } while (0) + +#define MALLOC_COPY(dest, src, nbytes) \ + do { \ + INTERNAL_SIZE_T *mcsrc = (INTERNAL_SIZE_T *)src; \ + INTERNAL_SIZE_T *mcdst = (INTERNAL_SIZE_T *)dest; \ + CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) \ + mcn = 0; \ + else { \ + mcn = (mctmp - 1) / 8; \ + mctmp %= 8; \ + } \ + switch (mctmp) { \ + case 0: \ + for (;;) { \ + *mcdst++ = *mcsrc++; \ + case 7: \ + *mcdst++ = *mcsrc++; \ + case 6: \ + *mcdst++ = *mcsrc++; \ + case 5: \ + *mcdst++ = *mcsrc++; \ + case 4: \ + *mcdst++ = *mcsrc++; \ + case 3: \ + *mcdst++ = *mcsrc++; \ + case 2: \ + *mcdst++ = *mcsrc++; \ + case 1: \ + *mcdst++ = *mcsrc++; \ + if (mcn <= 0) \ + break; \ + mcn--; \ + } \ + } \ + } while (0) + +#endif + +/* ------------------ MMAP support ------------------ */ + +#if HAVE_MMAP + +#ifndef LACKS_FCNTL_H +#include +#endif + +#ifndef LACKS_SYS_MMAN_H +#include +#endif + +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* + Nearly all versions of mmap support MAP_ANONYMOUS, + so the following is unlikely to be needed, but is + supplied just in case. +*/ + +#ifndef MAP_ANONYMOUS + +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ + +#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) \ + : mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) + +#else + +#define MMAP(addr, size, prot, flags) \ + (mmap((addr), (size), (prot), (flags) | MAP_ANONYMOUS, -1, 0)) + +#endif + +#endif /* HAVE_MMAP */ + +/* + ----------------------- Chunk representations ----------------------- +*/ + +/* + This struct declaration is misleading (but accurate and necessary). + It declares a "view" into memory allowing access to necessary + fields at known offsets from a given base. See explanation below. +*/ + +struct malloc_chunk { + + INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ + INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ + + struct malloc_chunk *fd; /* double links -- used only if free. */ + struct malloc_chunk *bk; +}; + +typedef struct malloc_chunk *mchunkptr; + +/* + malloc_chunk details: + + (The following includes lightly edited explanations by Colin Plumb.) + + Chunks of memory are maintained using a `boundary tag' method as + described in e.g., Knuth or Standish. (See the paper by Paul + Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a + survey of such techniques.) Sizes of free chunks are stored both + in the front of each chunk and at the end. This makes + consolidating fragmented chunks into bigger chunks very fast. The + size fields also hold bits representing whether chunks are free or + in use. + + An allocated chunk looks like this: + + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk, if allocated | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | User data starts here... . + . . + . (malloc_usable_space() bytes) . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + Where "chunk" is the front of the chunk for the purpose of most of + the malloc code, but "mem" is the pointer that is returned to the + user. "Nextchunk" is the beginning of the next contiguous chunk. + + Chunks always begin on even word boundries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + Free chunks are stored in circular doubly-linked lists, and look like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The P (PREV_INUSE) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, + preventing access to non-existent (or non-owned) memory. If + prev_inuse is set for any given chunk, then you CANNOT determine + the size of the previous chunk, and might even get a memory + addressing fault when trying to do so. + + Note that the `foot' of the current chunk is actually represented + as the prev_size of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The two exceptions to all this are + + 1. The special chunk `top' doesn't bother using the + trailing size field since there is no next contiguous chunk + that would have to index off it. After initialization, `top' + is forced to always exist. If it would become less than + MINSIZE bytes long, it is replenished. + + 2. Chunks allocated via mmap, which have the second-lowest-order + bit (IS_MMAPPED) set in their size fields. Because they are + allocated one-by-one, each must contain its own trailing size field. + +*/ + +/* + ---------- Size and alignment checks and conversions ---------- +*/ + +/* conversion from malloc headers to user pointers, and back */ + +#define chunk2mem(p) ((Void_t *)((char *)(p) + 2 * SIZE_SZ)) +#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - 2 * SIZE_SZ)) + +/* The smallest possible chunk */ +#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) + +/* The smallest size we can malloc is an aligned minimal chunk */ + +#define MINSIZE \ + (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) + +/* Check if m has acceptable alignment */ + +#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0) + +/* + Check if a request is so large that it would wrap around zero when + padded and aligned. To simplify some other code, the bound is made + low enough so that adding MINSIZE will also not wrap around sero. +*/ + +#define REQUEST_OUT_OF_RANGE(req) \ + ((CHUNK_SIZE_T)(req) >= \ + (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE)) + +/* pad request bytes into a usable size -- internal version */ + +#define request2size(req) \ + (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) + +/* Same, except also perform argument check */ + +#define checked_request2size(req, sz) \ + if (REQUEST_OUT_OF_RANGE(req)) { \ + MALLOC_FAILURE_ACTION; \ + return 0; \ + } \ + (sz) = request2size(req); + +/* + --------------- Physical chunk operations --------------- +*/ + +/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ +#define PREV_INUSE 0x1 + +/* extract inuse bit of previous chunk */ +#define prev_inuse(p) ((p)->size & PREV_INUSE) + +/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ +#define IS_MMAPPED 0x2 + +/* check for mmap()'ed chunk */ +#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) + +/* + Bits to mask off when extracting size + + Note: IS_MMAPPED is intentionally not masked off from size field in + macros for which mmapped chunks should never be seen. This should + cause helpful core dumps to occur if it is tried by accident by + people extending or adapting this malloc. +*/ +#define SIZE_BITS (PREV_INUSE | IS_MMAPPED) + +/* Get size, ignoring use bits */ +#define chunksize(p) ((p)->size & ~(SIZE_BITS)) + +/* Ptr to next physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE))) + +/* Ptr to previous physical malloc_chunk */ +#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_size))) + +/* Treat space at ptr + offset as a chunk */ +#define chunk_at_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) + +/* extract p's inuse bit */ +#define inuse(p) \ + ((((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE) + +/* set/clear chunk as being inuse without otherwise disturbing */ +#define set_inuse(p) \ + ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE + +#define clear_inuse(p) \ + ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE) + +/* check/set/clear inuse bits in known places */ +#define inuse_bit_at_offset(p, s) \ + (((mchunkptr)(((char *)(p)) + (s)))->size & PREV_INUSE) + +#define set_inuse_bit_at_offset(p, s) \ + (((mchunkptr)(((char *)(p)) + (s)))->size |= PREV_INUSE) + +#define clear_inuse_bit_at_offset(p, s) \ + (((mchunkptr)(((char *)(p)) + (s)))->size &= ~(PREV_INUSE)) + +/* Set size at head, without disturbing its use bit */ +#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s))) + +/* Set size/use field */ +#define set_head(p, s) ((p)->size = (s)) + +/* Set size at footer (only when chunk is not in use) */ +#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_size = (s)) + +/* + -------------------- Internal data structures -------------------- + + All internal state is held in an instance of malloc_state defined + below. There are no other static variables, except in two optional + cases: + * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. + * If HAVE_MMAP is true, but mmap doesn't support + MAP_ANONYMOUS, a dummy file descriptor for mmap. + + Beware of lots of tricks that minimize the total bookkeeping space + requirements. The result is a little over 1K bytes (for 4byte + pointers and size_t.) +*/ + +/* + Bins + + An array of bin headers for free chunks. Each bin is doubly + linked. The bins are approximately proportionally (log) spaced. + There are a lot of these bins (128). This may look excessive, but + works very well in practice. Most bins hold sizes that are + unusual as malloc request sizes, but are more usual for fragments + and consolidated sets of chunks, which is what these bins hold, so + they can be found quickly. All procedures maintain the invariant + that no consolidated chunk physically borders another one, so each + chunk in a list is known to be preceeded and followed by either + inuse chunks or the ends of memory. + + Chunks in bins are kept in size order, with ties going to the + approximately least recently used chunk. Ordering isn't needed + for the small bins, which all contain the same-sized chunks, but + facilitates best-fit allocation for larger chunks. These lists + are just sequential. Keeping them in order almost never requires + enough traversal to warrant using fancier ordered data + structures. + + Chunks of the same size are linked with the most + recently freed at the front, and allocations are taken from the + back. This results in LRU (FIFO) allocation order, which tends + to give each chunk an equal opportunity to be consolidated with + adjacent freed chunks, resulting in larger free chunks and less + fragmentation. + + To simplify use in double-linked lists, each bin header acts + as a malloc_chunk. This avoids special-casing for headers. + But to conserve space and improve locality, we allocate + only the fd/bk pointers of bins, and then use repositioning tricks + to treat these as the fields of a malloc_chunk*. +*/ + +typedef struct malloc_chunk *mbinptr; + +/* addressing -- note that bin_at(0) does not exist */ +#define bin_at(m, i) ((mbinptr)((char *)&((m)->bins[(i) << 1]) - (SIZE_SZ << 1))) + +/* analog of ++bin */ +#define next_bin(b) ((mbinptr)((char *)(b) + (sizeof(mchunkptr) << 1))) + +/* Reminders about list directionality within bins */ +#define first(b) ((b)->fd) +#define last(b) ((b)->bk) + +/* Take a chunk off a bin list */ +#define unlink(P, BK, FD) \ + { \ + FD = P->fd; \ + BK = P->bk; \ + FD->bk = BK; \ + BK->fd = FD; \ + } + +/* + Indexing + + Bins for sizes < 512 bytes contain chunks of all the same size, spaced + 8 bytes apart. Larger bins are approximately logarithmically spaced: + + 64 bins of size 8 + 32 bins of size 64 + 16 bins of size 512 + 8 bins of size 4096 + 4 bins of size 32768 + 2 bins of size 262144 + 1 bin of size what's left + + The bins top out around 1MB because we expect to service large + requests via mmap. +*/ + +#define NBINS 96 +#define NSMALLBINS 32 +#define SMALLBIN_WIDTH 8 +#define MIN_LARGE_SIZE 256 + +#define in_smallbin_range(sz) \ + ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE) + +#define smallbin_index(sz) (((unsigned)(sz)) >> 3) + +/* + Compute index for size. We expect this to be inlined when + compiled with optimization, else not, which works out well. +*/ +static int largebin_index(unsigned int sz) { + unsigned int x = sz >> SMALLBIN_WIDTH; + unsigned int m; /* bit position of highest set bit of m */ + + if (x >= 0x10000) { + return NBINS - 1; + } + + /* On intel, use BSRL instruction to find highest bit */ +#if defined(__GNUC__) && defined(i386) + + __asm__("bsrl %1,%0\n\t" + : "=r"(m) + : "g"(x)); + +#else + { + /* + Based on branch-free nlz algorithm in chapter 5 of Henry + S. Warren Jr's book "Hacker's Delight". + */ + + unsigned int n = ((x - 0x100) >> 16) & 8; + x <<= n; + m = ((x - 0x1000) >> 16) & 4; + n += m; + x <<= m; + m = ((x - 0x4000) >> 16) & 2; + n += m; + x = (x << m) >> 14; + m = 13 - n + (x & ~(x >> 1)); + } +#endif + + /* Use next 2 bits to create finer-granularity bins */ + return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3); +} + +#define bin_index(sz) \ + ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) + +/* + FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the + first bin that is maintained in sorted order. This must + be the smallest size corresponding to a given bin. + + Normally, this should be MIN_LARGE_SIZE. But you can weaken + best fit guarantees to sometimes speed up malloc by increasing value. + Doing this means that malloc may choose a chunk that is + non-best-fitting by up to the width of the bin. + + Some useful cutoff values: + 512 - all bins sorted + 2560 - leaves bins <= 64 bytes wide unsorted + 12288 - leaves bins <= 512 bytes wide unsorted + 65536 - leaves bins <= 4096 bytes wide unsorted + 262144 - leaves bins <= 32768 bytes wide unsorted + -1 - no bins sorted (not recommended!) +*/ + +#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE +/* #define FIRST_SORTED_BIN_SIZE 65536 */ + +/* + Unsorted chunks + + All remainders from chunk splits, as well as all returned chunks, + are first placed in the "unsorted" bin. They are then placed + in regular bins after malloc gives them ONE chance to be used before + binning. So, basically, the unsorted_chunks list acts as a queue, + with chunks being placed on it in free (and malloc_consolidate), + and taken off (to be either used or placed in bins) in malloc. +*/ + +/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ +#define unsorted_chunks(M) (bin_at(M, 1)) + +/* + Top + + The top-most available chunk (i.e., the one bordering the end of + available memory) is treated specially. It is never included in + any bin, is used only if no other chunk is available, and is + released back to the system if it is very large (see + M_TRIM_THRESHOLD). Because top initially + points to its own bin with initial zero size, thus forcing + extension on the first malloc request, we avoid having any special + code in malloc to check whether it even exists yet. But we still + need to do so when getting memory from system, so we make + initial_top treat the bin as a legal but unusable chunk during the + interval between initialization and the first call to + sYSMALLOc. (This is somewhat delicate, since it relies on + the 2 preceding words to be zero during this interval as well.) +*/ + +/* Conveniently, the unsorted bin can be used as dummy top on first call */ +#define initial_top(M) (unsorted_chunks(M)) + +/* + Binmap + + To help compensate for the large number of bins, a one-level index + structure is used for bin-by-bin searching. `binmap' is a + bitvector recording whether bins are definitely empty so they can + be skipped over during during traversals. The bits are NOT always + cleared as soon as bins are empty, but instead only + when they are noticed to be empty during traversal in malloc. +*/ + +/* Conservatively use 32 bits per map word, even if on 64bit system */ +#define BINMAPSHIFT 5 +#define BITSPERMAP (1U << BINMAPSHIFT) +#define BINMAPSIZE (NBINS / BITSPERMAP) + +#define idx2block(i) ((i) >> BINMAPSHIFT) +#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1)))) + +#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) +#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) +#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i)) + +/* + Fastbins + + An array of lists holding recently freed small chunks. Fastbins + are not doubly linked. It is faster to single-link them, and + since chunks are never removed from the middles of these lists, + double linking is not necessary. Also, unlike regular bins, they + are not even processed in FIFO order (they use faster LIFO) since + ordering doesn't much matter in the transient contexts in which + fastbins are normally used. + + Chunks in fastbins keep their inuse bit set, so they cannot + be consolidated with other free chunks. malloc_consolidate + releases all chunks in fastbins and consolidates them with + other free chunks. +*/ + +typedef struct malloc_chunk *mfastbinptr; + +/* offset 2 to use otherwise unindexable first 2 bins */ +#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) + +/* The maximum fastbin request size we support */ +#define MAX_FAST_SIZE 80 + +#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE)) + 1) + +/* + FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() + that triggers automatic consolidation of possibly-surrounding + fastbin chunks. This is a heuristic, so the exact value should not + matter too much. It is defined at half the default trim threshold as a + compromise heuristic to only attempt consolidation if it is likely + to lead to trimming. However, it is not dynamically tunable, since + consolidation reduces fragmentation surrounding loarge chunks even + if trimming is not used. +*/ + +#define FASTBIN_CONSOLIDATION_THRESHOLD \ + ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1) + +/* + Since the lowest 2 bits in max_fast don't matter in size comparisons, + they are used as flags. +*/ + +/* + ANYCHUNKS_BIT held in max_fast indicates that there may be any + freed chunks at all. It is set true when entering a chunk into any + bin. +*/ + +#define ANYCHUNKS_BIT (1U) + +#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT)) +#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT) +#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT) + +/* + FASTCHUNKS_BIT held in max_fast indicates that there are probably + some fastbin chunks. It is set true on entering a chunk into any + fastbin, and cleared only in malloc_consolidate. +*/ + +#define FASTCHUNKS_BIT (2U) + +#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT)) +#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT | ANYCHUNKS_BIT)) +#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT)) + +/* + Set value of max_fast. + Use impossibly small value if 0. +*/ + +#define set_max_fast(M, s) \ + (M)->max_fast = (((s) == 0) ? SMALLBIN_WIDTH : request2size(s)) | \ + ((M)->max_fast & (FASTCHUNKS_BIT | ANYCHUNKS_BIT)) + +#define get_max_fast(M) \ + ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT)) + +/* + morecore_properties is a status word holding dynamically discovered + or controlled properties of the morecore function +*/ + +#define MORECORE_CONTIGUOUS_BIT (1U) + +#define contiguous(M) \ + (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT)) +#define noncontiguous(M) \ + (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0) +#define set_contiguous(M) \ + ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT) +#define set_noncontiguous(M) \ + ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT) + +/* + ----------- Internal state representation and initialization ----------- +*/ + +struct malloc_state { + + /* The maximum chunk size to be eligible for fastbin */ + INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ + + /* Fastbins */ + mfastbinptr fastbins[NFASTBINS]; + + /* Base of the topmost chunk -- not otherwise kept in a bin */ + mchunkptr top; + + /* The remainder from the most recent split of a small request */ + mchunkptr last_remainder; + + /* Normal bins packed as described above */ + mchunkptr bins[NBINS * 2]; + + /* Bitmap of bins. Trailing zero map handles cases of largest binned size */ + unsigned int binmap[BINMAPSIZE + 1]; + + /* Tunable parameters */ + CHUNK_SIZE_T trim_threshold; + INTERNAL_SIZE_T top_pad; + INTERNAL_SIZE_T mmap_threshold; + + /* Memory map support */ + int n_mmaps; + int n_mmaps_max; + int max_n_mmaps; + + /* Cache malloc_getpagesize */ + unsigned int pagesize; + + /* Track properties of MORECORE */ + unsigned int morecore_properties; + + /* Statistics */ + INTERNAL_SIZE_T mmapped_mem; + INTERNAL_SIZE_T sbrked_mem; + INTERNAL_SIZE_T max_sbrked_mem; + INTERNAL_SIZE_T max_mmapped_mem; + INTERNAL_SIZE_T max_total_mem; +}; + +typedef struct malloc_state *mstate; + +/* + There is exactly one instance of this struct in this malloc. + If you are adapting this malloc in a way that does NOT use a static + malloc_state, you MUST explicitly zero-fill it before using. This + malloc relies on the property that malloc_state is initialized to + all zeroes (as is true of C statics). +*/ + +static struct malloc_state av_; /* never directly referenced */ + +/* + All uses of av_ are via get_malloc_state(). + At most one "call" to get_malloc_state is made per invocation of + the public versions of malloc and free, but other routines + that in turn invoke malloc and/or free may call more then once. + Also, it is called in check* routines if DL_DEBUG is set. +*/ + +#define get_malloc_state() (&(av_)) + +/* + Initialize a malloc_state struct. + + This is called only from within malloc_consolidate, which needs + be called in the same contexts anyway. It is never called directly + outside of malloc_consolidate because some optimizing compilers try + to inline it at all call points, which turns out not to be an + optimization at all. (Inlining it in malloc_consolidate is fine though.) +*/ + +#if __STD_C +static void malloc_init_state(mstate av) +#else +static void malloc_init_state(av) mstate av; +#endif +{ + int i; + mbinptr bin; + + /* Establish circular links for normal bins */ + for (i = 1; i < NBINS; ++i) { + bin = bin_at(av, i); + bin->fd = bin->bk = bin; + } + + av->top_pad = DEFAULT_TOP_PAD; + av->n_mmaps_max = DEFAULT_MMAP_MAX; + av->mmap_threshold = DEFAULT_MMAP_THRESHOLD; + av->trim_threshold = DEFAULT_TRIM_THRESHOLD; + +#if MORECORE_CONTIGUOUS + set_contiguous(av); +#else + set_noncontiguous(av); +#endif + + set_max_fast(av, DEFAULT_MXFAST); + + av->top = initial_top(av); + av->pagesize = malloc_getpagesize; +} + +/* + Other internal utilities operating on mstates +*/ + +#if __STD_C +static Void_t *sYSMALLOc(INTERNAL_SIZE_T, mstate); +static int sYSTRIm(size_t, mstate); +static void malloc_consolidate(mstate); +static Void_t **iALLOc(size_t, size_t *, int, Void_t **); +#else +static Void_t *sYSMALLOc(); +static int sYSTRIm(); +static void malloc_consolidate(); +static Void_t **iALLOc(); +#endif + +/* + Debugging support + + These routines make a number of assertions about the states + of data structures that should be true at all times. If any + are not true, it's very likely that a user program has somehow + trashed memory. (It's also possible that there is a coding error + in malloc. In which case, please report it!) +*/ + +#if !DL_DEBUG + +#define check_chunk(P) +#define check_free_chunk(P) +#define check_inuse_chunk(P) +#define check_remalloced_chunk(P, N) +#define check_malloced_chunk(P, N) +#define check_malloc_state() + +#else +#define check_chunk(P) do_check_chunk(P) +#define check_free_chunk(P) do_check_free_chunk(P) +#define check_inuse_chunk(P) do_check_inuse_chunk(P) +#define check_remalloced_chunk(P, N) do_check_remalloced_chunk(P, N) +#define check_malloced_chunk(P, N) do_check_malloced_chunk(P, N) +#define check_malloc_state() do_check_malloc_state() + +/* + Properties of all chunks +*/ + +#if __STD_C +static void do_check_chunk(mchunkptr p) +#else +static void do_check_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + CHUNK_SIZE_T sz = chunksize(p); + /* min and max possible addresses assuming contiguous allocation */ + char *max_address = (char *)(av->top) + chunksize(av->top); + char *min_address = max_address - av->sbrked_mem; + + if (!chunk_is_mmapped(p)) { + + /* Has legal address ... */ + if (p != av->top) { + if (contiguous(av)) { + assert(((char *)p) >= min_address); + assert(((char *)p + sz) <= ((char *)(av->top))); + } + } else { + /* top size is always at least MINSIZE */ + assert((CHUNK_SIZE_T)(sz) >= MINSIZE); + /* top predecessor always marked inuse */ + assert(prev_inuse(p)); + } + } else { +#if HAVE_MMAP + /* address is outside main heap */ + if (contiguous(av) && av->top != initial_top(av)) { + assert(((char *)p) < min_address || ((char *)p) > max_address); + } + /* chunk is page-aligned */ + assert(((p->prev_size + sz) & (av->pagesize - 1)) == 0); + /* mem is aligned */ + assert(aligned_OK(chunk2mem(p))); +#else + /* force an appropriate assert violation if debug set */ + assert(!chunk_is_mmapped(p)); +#endif + } +} + +/* + Properties of free chunks +*/ + +#if __STD_C +static void do_check_free_chunk(mchunkptr p) +#else +static void do_check_free_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; + mchunkptr next = chunk_at_offset(p, sz); + + do_check_chunk(p); + + /* Chunk must claim to be free ... */ + assert(!inuse(p)); + assert(!chunk_is_mmapped(p)); + + /* Unless a special marker, must have OK fields */ + if ((CHUNK_SIZE_T)(sz) >= MINSIZE) { + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert(aligned_OK(chunk2mem(p))); + /* ... matching footer field */ + assert(next->prev_size == sz); + /* ... and is fully consolidated */ + assert(prev_inuse(p)); + assert(next == av->top || inuse(next)); + + /* ... and has minimally sane links */ + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } else { /* markers are always of size SIZE_SZ */ + assert(sz == SIZE_SZ); + } +} + +/* + Properties of inuse chunks +*/ + +#if __STD_C +static void do_check_inuse_chunk(mchunkptr p) +#else +static void do_check_inuse_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + mchunkptr next; + do_check_chunk(p); + + if (chunk_is_mmapped(p)) { + return; /* mmapped chunks have no next/prev */ + } + + /* Check whether it claims to be in use ... */ + assert(inuse(p)); + + next = next_chunk(p); + + /* ... and is surrounded by OK chunks. + Since more things can be checked with free chunks than inuse ones, + if an inuse chunk borders them and debug is on, it's worth doing them. + */ + if (!prev_inuse(p)) { + /* Note that we cannot even look at prev unless it is not inuse */ + mchunkptr prv = prev_chunk(p); + assert(next_chunk(prv) == p); + do_check_free_chunk(prv); + } + + if (next == av->top) { + assert(prev_inuse(next)); + assert(chunksize(next) >= MINSIZE); + } else if (!inuse(next)) { + do_check_free_chunk(next); + } +} + +/* + Properties of chunks recycled from fastbins +*/ + +#if __STD_C +static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_remalloced_chunk(p, s) mchunkptr p; +INTERNAL_SIZE_T s; +#endif +{ + INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; + + do_check_inuse_chunk(p); + + /* Legal size ... */ + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert((CHUNK_SIZE_T)(sz) >= MINSIZE); + /* ... and alignment */ + assert(aligned_OK(chunk2mem(p))); + /* chunk is less than MINSIZE more than request */ + assert((long)(sz) - (long)(s) >= 0); + assert((long)(sz) - (long)(s + MINSIZE) < 0); +} + +/* + Properties of nonrecycled chunks at the point they are malloced +*/ + +#if __STD_C +static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_malloced_chunk(p, s) mchunkptr p; +INTERNAL_SIZE_T s; +#endif +{ + /* same as recycled case ... */ + do_check_remalloced_chunk(p, s); + + /* + ... plus, must obey implementation invariant that prev_inuse is + always true of any allocated chunk; i.e., that each allocated + chunk borders either a previously allocated and still in-use + chunk, or the base of its memory arena. This is ensured + by making all allocations from the the `lowest' part of any found + chunk. This does not necessarily hold however for chunks + recycled via fastbins. + */ + + assert(prev_inuse(p)); +} + +/* + Properties of malloc_state. + + This may be useful for debugging malloc, as well as detecting user + programmer errors that somehow write into malloc_state. + + If you are extending or experimenting with this malloc, you can + probably figure out how to hack this routine to print out or + display chunk addresses, sizes, bins, and other instrumentation. +*/ + +static void do_check_malloc_state(void) { + mstate av = get_malloc_state(); + int i; + mchunkptr p; + mchunkptr q; + mbinptr b; + unsigned int binbit; + int empty; + unsigned int idx; + INTERNAL_SIZE_T size; + CHUNK_SIZE_T total = 0; + int max_fast_bin; + + /* internal size_t must be no wider than pointer type */ + assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char *)); + + /* alignment is a power of 2 */ + assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0); + + /* cannot run remaining checks until fully initialized */ + if (av->top == 0 || av->top == initial_top(av)) { + return; + } + + /* pagesize is a power of 2 */ + assert((av->pagesize & (av->pagesize - 1)) == 0); + + /* properties of fastbins */ + + /* max_fast is in allowed range */ + assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE)); + + max_fast_bin = fastbin_index(av->max_fast); + + for (i = 0; NFASTBINS - i > 0; ++i) { + p = av->fastbins[i]; + + /* all bins past max_fast are empty */ + if (i > max_fast_bin) { + assert(p == 0); + } + + while (p != 0) { + /* each chunk claims to be inuse */ + do_check_inuse_chunk(p); + total += chunksize(p); + /* chunk belongs in this bin */ + assert(fastbin_index(chunksize(p)) == i); + p = p->fd; + } + } + + if (total != 0) { + assert(have_fastchunks(av)); + } else if (!have_fastchunks(av)) { + assert(total == 0); + } + + /* check normal bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av, i); + + /* binmap is accurate (except for bin 1 == unsorted_chunks) */ + if (i >= 2) { + binbit = get_binmap(av, i); + empty = last(b) == b; + if (!binbit) { + assert(empty); + } else if (!empty) { + assert(binbit); + } + } + + for (p = last(b); p != b; p = p->bk) { + /* each chunk claims to be free */ + do_check_free_chunk(p); + size = chunksize(p); + total += size; + if (i >= 2) { + /* chunk belongs in bin */ + idx = bin_index(size); + assert(idx == i); + /* lists are sorted */ + if ((CHUNK_SIZE_T)size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { + assert(p->bk == b || + (CHUNK_SIZE_T)chunksize(p->bk) >= + (CHUNK_SIZE_T)chunksize(p)); + } + } + /* chunk is followed by a legal chain of inuse chunks */ + for (q = next_chunk(p); + (q != av->top && inuse(q) && + (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE); + q = next_chunk(q)) { + do_check_inuse_chunk(q); + } + } + } + + /* top chunk is OK */ + check_chunk(av->top); + + /* sanity checks for statistics */ + + assert(total <= (CHUNK_SIZE_T)(av->max_total_mem)); + assert(av->n_mmaps >= 0); + assert(av->n_mmaps <= av->max_n_mmaps); + + assert((CHUNK_SIZE_T)(av->sbrked_mem) <= + (CHUNK_SIZE_T)(av->max_sbrked_mem)); + + assert((CHUNK_SIZE_T)(av->mmapped_mem) <= + (CHUNK_SIZE_T)(av->max_mmapped_mem)); + + assert((CHUNK_SIZE_T)(av->max_total_mem) >= + (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem)); +} +#endif + +/* ----------- Routines dealing with system allocation -------------- */ + +/* + sysmalloc handles malloc cases requiring more memory from the system. + On entry, it is assumed that av->top does not have enough + space to service request for nb bytes, thus requiring that av->top + be extended or replaced. +*/ + +#if __STD_C +static Void_t *sYSMALLOc(INTERNAL_SIZE_T nb, mstate av) +#else +static Void_t *sYSMALLOc(nb, av) +INTERNAL_SIZE_T nb; +mstate av; +#endif +{ + mchunkptr old_top; /* incoming value of av->top */ + INTERNAL_SIZE_T old_size; /* its size */ + char *old_end; /* its end address */ + + long size; /* arg to first MORECORE or mmap call */ + char *brk; /* return value from MORECORE */ + + long correction; /* arg to 2nd MORECORE call */ + char *snd_brk; /* 2nd return val */ + + INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ + INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ + char *aligned_brk; /* aligned offset into brk */ + + mchunkptr p; /* the allocated/returned chunk */ + mchunkptr remainder; /* remainder from allocation */ + CHUNK_SIZE_T remainder_size; /* its size */ + + CHUNK_SIZE_T sum; /* for updating stats */ + + size_t pagemask = av->pagesize - 1; + + /* + If there is space available in fastbins, consolidate and retry + malloc from scratch rather than getting memory from system. This + can occur only if nb is in smallbin range so we didn't consolidate + upon entry to malloc. It is much easier to handle this case here + than in malloc proper. + */ + + if (have_fastchunks(av)) { + assert(in_smallbin_range(nb)); + malloc_consolidate(av); + return mALLOc(nb - MALLOC_ALIGN_MASK); + } + +#if HAVE_MMAP + + /* + If have mmap, and the request size meets the mmap threshold, and + the system supports mmap, and there are few enough currently + allocated mmapped regions, try to directly map this request + rather than expanding top. + */ + + if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) && + (av->n_mmaps < av->n_mmaps_max)) { + + char *mm; /* return value from mmap call*/ + + /* + Round up size to nearest page. For mmapped chunks, the overhead + is one SIZE_SZ unit larger than for normal chunks, because there + is no following chunk whose prev_size field could be used. + */ + size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; + + /* Don't try if size wraps around 0 */ + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { + + mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + if (mm != (char *)(MORECORE_FAILURE)) { + + /* + The offset to the start of the mmapped region is stored + in the prev_size field of the chunk. This allows us to adjust + returned start address to meet alignment requirements here + and in memalign(), and still be able to compute proper + address argument for later munmap in free() and realloc(). + */ + + front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; + if (front_misalign > 0) { + correction = MALLOC_ALIGNMENT - front_misalign; + p = (mchunkptr)(mm + correction); + p->prev_size = correction; + set_head(p, (size - correction) | IS_MMAPPED); + } else { + p = (mchunkptr)mm; + p->prev_size = 0; + set_head(p, size | IS_MMAPPED); + } + + /* update statistics */ + + if (++av->n_mmaps > av->max_n_mmaps) { + av->max_n_mmaps = av->n_mmaps; + } + + sum = av->mmapped_mem += size; + if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) { + av->max_mmapped_mem = sum; + } + sum += av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { + av->max_total_mem = sum; + } + + check_chunk(p); + + return chunk2mem(p); + } + } + } +#endif + + /* Record incoming configuration of top */ + + old_top = av->top; + old_size = chunksize(old_top); + old_end = (char *)(chunk_at_offset(old_top, old_size)); + + brk = snd_brk = (char *)(MORECORE_FAILURE); + + /* + If not the first time through, we require old_size to be + at least MINSIZE and to have prev_inuse set. + */ + + assert((old_top == initial_top(av) && old_size == 0) || + ((CHUNK_SIZE_T)(old_size) >= MINSIZE && + prev_inuse(old_top))); + + /* Precondition: not enough current space to satisfy nb request */ + assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE)); + + /* Precondition: all fastbins are consolidated */ + assert(!have_fastchunks(av)); + + /* Request enough space for nb + pad + overhead */ + + size = nb + av->top_pad + MINSIZE; + + /* + If contiguous, we can subtract out existing space that we hope to + combine with new space. We add it back later only if + we don't actually get contiguous space. + */ + + if (contiguous(av)) { + size -= old_size; + } + + /* + Round to a multiple of page size. + If MORECORE is not contiguous, this ensures that we only call it + with whole-page arguments. And if MORECORE is contiguous and + this is not first time through, this preserves page-alignment of + previous calls. Otherwise, we correct to page-align below. + */ + + size = (size + pagemask) & ~pagemask; + + /* + Don't try to call MORECORE if argument is so big as to appear + negative. Note that since mmap takes size_t arg, it may succeed + below even if we cannot call MORECORE. + */ + + if (size > 0) { + brk = (char *)(MORECORE(size)); + } + + /* + If have mmap, try using it as a backup when MORECORE fails or + cannot be used. This is worth doing on systems that have "holes" in + address space, so sbrk cannot extend to give contiguous space, but + space is available elsewhere. Note that we ignore mmap max count + and threshold limits, since the space will not be used as a + segregated mmap region. + */ + +#if HAVE_MMAP + if (brk == (char *)(MORECORE_FAILURE)) { + + /* Cannot merge with old top, so add its size back in */ + if (contiguous(av)) { + size = (size + old_size + pagemask) & ~pagemask; + } + + /* If we are relying on mmap as backup, then use larger units */ + if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE)) { + size = MMAP_AS_MORECORE_SIZE; + } + + /* Don't try if size wraps around 0 */ + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { + + brk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + if (brk != (char *)(MORECORE_FAILURE)) { + + /* We do not need, and cannot use, another sbrk call to find end */ + snd_brk = brk + size; + + /* + Record that we no longer have a contiguous sbrk region. + After the first time mmap is used as backup, we do not + ever rely on contiguous space since this could incorrectly + bridge regions. + */ + set_noncontiguous(av); + } + } + } +#endif + + if (brk != (char *)(MORECORE_FAILURE)) { + av->sbrked_mem += size; + + /* + If MORECORE extends previous space, we can likewise extend top size. + */ + + if (brk == old_end && snd_brk == (char *)(MORECORE_FAILURE)) { + set_head(old_top, (size + old_size) | PREV_INUSE); + } + + /* + Otherwise, make adjustments: + + * If the first time through or noncontiguous, we need to call sbrk + just to find out where the end of memory lies. + + * We need to ensure that all returned chunks from malloc will meet + MALLOC_ALIGNMENT + + * If there was an intervening foreign sbrk, we need to adjust sbrk + request size to account for fact that we will not be able to + combine new space with existing space in old_top. + + * Almost all systems internally allocate whole pages at a time, in + which case we might as well use the whole last page of request. + So we allocate enough more memory to hit a page boundary now, + which in turn causes future contiguous calls to page-align. + */ + + else { + front_misalign = 0; + end_misalign = 0; + correction = 0; + aligned_brk = brk; + + /* + If MORECORE returns an address lower than we have seen before, + we know it isn't really contiguous. This and some subsequent + checks help cope with non-conforming MORECORE functions and + the presence of "foreign" calls to MORECORE from outside of + malloc or by other threads. We cannot guarantee to detect + these in all cases, but cope with the ones we do detect. + */ + if (contiguous(av) && old_size != 0 && brk < old_end) { + set_noncontiguous(av); + } + + /* handle contiguous cases */ + if (contiguous(av)) { + + /* + We can tolerate forward non-contiguities here (usually due + to foreign calls) but treat them as part of our space for + stats reporting. + */ + if (old_size != 0) { + av->sbrked_mem += brk - old_end; + } + + /* Guarantee alignment of first new chunk made from this space */ + + front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; + if (front_misalign > 0) { + + /* + Skip over some bytes to arrive at an aligned position. + We don't need to specially mark these wasted front bytes. + They will never be accessed anyway because + prev_inuse of av->top (and any chunk created from its start) + is always true after initialization. + */ + + correction = MALLOC_ALIGNMENT - front_misalign; + aligned_brk += correction; + } + + /* + If this isn't adjacent to existing space, then we will not + be able to merge with old_top space, so must add to 2nd request. + */ + + correction += old_size; + + /* Extend the end address to hit a page boundary */ + end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); + correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign; + + assert(correction >= 0); + snd_brk = (char *)(MORECORE(correction)); + + if (snd_brk == (char *)(MORECORE_FAILURE)) { + /* + If can't allocate correction, try to at least find out current + brk. It might be enough to proceed without failing. + */ + correction = 0; + snd_brk = (char *)(MORECORE(0)); + } else if (snd_brk < brk) { + /* + If the second call gives noncontiguous space even though + it says it won't, the only course of action is to ignore + results of second call, and conservatively estimate where + the first call left us. Also set noncontiguous, so this + won't happen again, leaving at most one hole. + + Note that this check is intrinsically incomplete. Because + MORECORE is allowed to give more space than we ask for, + there is no reliable way to detect a noncontiguity + producing a forward gap for the second call. + */ + snd_brk = brk + size; + correction = 0; + set_noncontiguous(av); + } + } + + /* handle non-contiguous cases */ + else { + /* MORECORE/mmap must correctly align */ + assert(aligned_OK(chunk2mem(brk))); + + /* Find out current end of memory */ + if (snd_brk == (char *)(MORECORE_FAILURE)) { + snd_brk = (char *)(MORECORE(0)); + av->sbrked_mem += snd_brk - brk - size; + } + } + + /* Adjust top based on results of second sbrk */ + if (snd_brk != (char *)(MORECORE_FAILURE)) { + av->top = (mchunkptr)aligned_brk; + set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); + av->sbrked_mem += correction; + + /* + If not the first time through, we either have a + gap due to foreign sbrk or a non-contiguous region. Insert a + double fencepost at old_top to prevent consolidation with space + we don't own. These fenceposts are artificial chunks that are + marked as inuse and are in any case too small to use. We need + two to make sizes and alignments work out. + */ + + if (old_size != 0) { + /* + Shrink old_top to insert fenceposts, keeping size a + multiple of MALLOC_ALIGNMENT. We know there is at least + enough space in old_top to do this. + */ + old_size = (old_size - 3 * SIZE_SZ) & ~MALLOC_ALIGN_MASK; + set_head(old_top, old_size | PREV_INUSE); + + /* + Note that the following assignments completely overwrite + old_top when old_size was previously MINSIZE. This is + intentional. We need the fencepost, even if old_top otherwise gets + lost. + */ + chunk_at_offset(old_top, old_size)->size = + SIZE_SZ | PREV_INUSE; + + chunk_at_offset(old_top, old_size + SIZE_SZ)->size = + SIZE_SZ | PREV_INUSE; + + /* + If possible, release the rest, suppressing trimming. + */ + if (old_size >= MINSIZE) { + INTERNAL_SIZE_T tt = av->trim_threshold; + av->trim_threshold = (INTERNAL_SIZE_T)(-1); + fREe(chunk2mem(old_top)); + av->trim_threshold = tt; + } + } + } + } + + /* Update statistics */ + sum = av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem)) { + av->max_sbrked_mem = sum; + } + + sum += av->mmapped_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { + av->max_total_mem = sum; + } + + check_malloc_state(); + + /* finally, do the allocation */ + + p = av->top; + size = chunksize(p); + + /* check that one of the above allocation paths succeeded */ + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(p, nb); + av->top = remainder; + set_head(p, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + check_malloced_chunk(p, nb); + return chunk2mem(p); + } + } + + /* catch all failure paths */ + MALLOC_FAILURE_ACTION; + return 0; +} + +/* + sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back + to the system (via negative arguments to sbrk) if there is unused + memory at the `high' end of the malloc pool. It is called + automatically by free() when top space exceeds the trim + threshold. It is also called by the public malloc_trim routine. It + returns 1 if it actually released any memory, else 0. +*/ + +#if __STD_C +static int sYSTRIm(size_t pad, mstate av) +#else +static int sYSTRIm(pad, av) +size_t pad; +mstate av; +#endif +{ + long top_size; /* Amount of top-most memory */ + long extra; /* Amount to release */ + long released; /* Amount actually released */ + char *current_brk; /* address returned by pre-check sbrk call */ + char *new_brk; /* address returned by post-check sbrk call */ + size_t pagesz; + + pagesz = av->pagesize; + top_size = chunksize(av->top); + + /* Release in pagesize units, keeping at least one page */ + extra = ((top_size - pad - MINSIZE + (pagesz - 1)) / pagesz - 1) * pagesz; + + if (extra > 0) { + + /* + Only proceed if end of memory is where we last set it. + This avoids problems if there were foreign sbrk calls. + */ + current_brk = (char *)(MORECORE(0)); + if (current_brk == (char *)(av->top) + top_size) { + + /* + Attempt to release memory. We ignore MORECORE return value, + and instead call again to find out where new end of memory is. + This avoids problems if first call releases less than we asked, + of if failure somehow altered brk value. (We could still + encounter problems if it altered brk in some very bad way, + but the only thing we can do is adjust anyway, which will cause + some downstream failure.) + */ + + MORECORE(-extra); + new_brk = (char *)(MORECORE(0)); + + if (new_brk != (char *)MORECORE_FAILURE) { + released = (long)(current_brk - new_brk); + + if (released != 0) { + /* Success. Adjust top. */ + av->sbrked_mem -= released; + set_head(av->top, (top_size - released) | PREV_INUSE); + check_malloc_state(); + return 1; + } + } + } + } + return 0; +} + +/* + ------------------------------ malloc ------------------------------ +*/ + +#if __STD_C +Void_t *mALLOc(size_t bytes) +#else +Void_t *mALLOc(bytes) +size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T nb; /* normalized request size */ + unsigned int idx; /* associated bin index */ + mbinptr bin; /* associated bin */ + mfastbinptr *fb; /* associated fastbin */ + + mchunkptr victim; /* inspected/selected chunk */ + INTERNAL_SIZE_T size; /* its size */ + int victim_index; /* its bin index */ + + mchunkptr remainder; /* remainder from a split */ + CHUNK_SIZE_T remainder_size; /* its size */ + + unsigned int block; /* bit map traverser */ + unsigned int bit; /* bit map traverser */ + unsigned int map; /* current word of binmap */ + + mchunkptr fwd; /* misc temp for linking */ + mchunkptr bck; /* misc temp for linking */ + + /* + Convert request size to internal form by adding SIZE_SZ bytes + overhead plus possibly more to obtain necessary alignment and/or + to obtain a size of at least MINSIZE, the smallest allocatable + size. Also, checked_request2size traps (returning 0) request sizes + that are so large that they wrap around zero when padded and + aligned. + */ + + checked_request2size(bytes, nb); + + /* + Bypass search if no frees yet + */ + if (!have_anychunks(av)) { + if (av->max_fast == 0) { /* initialization check */ + malloc_consolidate(av); + } + goto use_top; + } + + /* + If the size qualifies as a fastbin, first check corresponding bin. + */ + + if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) { + fb = &(av->fastbins[(fastbin_index(nb))]); + if ((victim = *fb) != 0) { + *fb = victim->fd; + check_remalloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + + /* + If a small request, check regular bin. Since these "smallbins" + hold one size each, no searching within bins is necessary. + (For a large request, we need to wait until unsorted chunks are + processed to find best fit. But for small ones, fits are exact + anyway, so we can check now, which is faster.) + */ + + if (in_smallbin_range(nb)) { + idx = smallbin_index(nb); + bin = bin_at(av, idx); + + if ((victim = last(bin)) != bin) { + bck = victim->bk; + set_inuse_bit_at_offset(victim, nb); + bin->bk = bck; + bck->fd = bin; + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + + /* + If this is a large request, consolidate fastbins before continuing. + While it might look excessive to kill all fastbins before + even seeing if there is space available, this avoids + fragmentation problems normally associated with fastbins. + Also, in practice, programs tend to have runs of either small or + large requests, but less often mixtures, so consolidation is not + invoked all that often in most programs. And the programs that + it is called frequently in otherwise tend to fragment. + */ + + else { + idx = largebin_index(nb); + if (have_fastchunks(av)) { + malloc_consolidate(av); + } + } + + /* + Process recently freed or remaindered chunks, taking one only if + it is exact fit, or, if this a small request, the chunk is remainder from + the most recent non-exact fit. Place other traversed chunks in + bins. Note that this step is the only place in any routine where + chunks are placed in bins. + */ + + while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { + bck = victim->bk; + size = chunksize(victim); + + /* + If a small request, try to use last remainder if it is the + only chunk in unsorted bin. This helps promote locality for + runs of consecutive small requests. This is the only + exception to best-fit, and applies only when there is + no exact fit for a small chunk. + */ + + if (in_smallbin_range(nb) && + bck == unsorted_chunks(av) && + victim == av->last_remainder && + (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { + + /* split and reattach remainder */ + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + av->last_remainder = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* remove from unsorted list */ + unsorted_chunks(av)->bk = bck; + bck->fd = unsorted_chunks(av); + + /* Take now instead of binning if exact fit */ + + if (size == nb) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* place chunk in bin */ + + if (in_smallbin_range(size)) { + victim_index = smallbin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + } else { + victim_index = largebin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + + if (fwd != bck) { + /* if smaller than smallest, place first */ + if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) { + fwd = bck; + bck = bck->bk; + } else if ((CHUNK_SIZE_T)(size) >= + (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { + + /* maintain large bins in sorted order */ + size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ + while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) { + fwd = fwd->fd; + } + bck = fwd->bk; + } + } + } + + mark_bin(av, victim_index); + victim->bk = bck; + victim->fd = fwd; + fwd->bk = victim; + bck->fd = victim; + } + + /* + If a large request, scan through the chunks of current bin to + find one that fits. (This will be the smallest that fits unless + FIRST_SORTED_BIN_SIZE has been changed from default.) This is + the only step where an unbounded number of chunks might be + scanned without doing anything useful with them. However the + lists tend to be short. + */ + + if (!in_smallbin_range(nb)) { + bin = bin_at(av, idx); + + for (victim = last(bin); victim != bin; victim = victim->bk) { + size = chunksize(victim); + + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) { + remainder_size = size - nb; + unlink(victim, bck, fwd); + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + } + } + + /* + Search for a chunk by scanning bins, starting with next largest + bin. This search is strictly by best-fit; i.e., the smallest + (with ties going to approximately the least recently used) chunk + that fits is selected. + + The bitmap avoids needing to check that most blocks are nonempty. + */ + + ++idx; + bin = bin_at(av, idx); + block = idx2block(idx); + map = av->binmap[block]; + bit = idx2bit(idx); + + for (;;) { + + /* Skip rest of block if there are no more set bits in this block. */ + if (bit > map || bit == 0) { + do { + if (++block >= BINMAPSIZE) { /* out of bins */ + goto use_top; + } + } while ((map = av->binmap[block]) == 0); + + bin = bin_at(av, (block << BINMAPSHIFT)); + bit = 1; + } + + /* Advance to bin with set bit. There must be one. */ + while ((bit & map) == 0) { + bin = next_bin(bin); + bit <<= 1; + assert(bit != 0); + } + + /* Inspect the bin. It is likely to be non-empty */ + victim = last(bin); + + /* If a false alarm (empty bin), clear the bit. */ + if (victim == bin) { + av->binmap[block] = map &= ~bit; /* Write through */ + bin = next_bin(bin); + bit <<= 1; + } + + else { + size = chunksize(victim); + + /* We know the first chunk in this bin is big enough to use. */ + assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)); + + remainder_size = size - nb; + + /* unlink */ + bck = victim->bk; + bin->bk = bck; + bck->fd = bin; + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + /* advertise as last remainder */ + if (in_smallbin_range(nb)) { + av->last_remainder = remainder; + } + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + } + +use_top: + /* + If large enough, split off the chunk bordering the end of memory + (held in av->top). Note that this is in accord with the best-fit + search rule. In effect, av->top is treated as larger (and thus + less well fitting) than any other available chunk since it can + be extended to be as large as necessary (up to system + limitations). + + We require that av->top always exists (i.e., has size >= + MINSIZE) after initialization, so if it would otherwise be + exhuasted by current request, it is replenished. (The main + reason for ensuring it exists is that we may need MINSIZE space + to put in fenceposts in sysmalloc.) + */ + + victim = av->top; + size = chunksize(victim); + + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + av->top = remainder; + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* + If no space in top, relay to handle system-dependent cases + */ + return sYSMALLOc(nb, av); +} + +/* + ------------------------------ free ------------------------------ +*/ + +#if __STD_C +void fREe(Void_t *mem) +#else +void fREe(mem) Void_t *mem; +#endif +{ + mstate av = get_malloc_state(); + + mchunkptr p; /* chunk corresponding to mem */ + INTERNAL_SIZE_T size; /* its size */ + mfastbinptr *fb; /* associated fastbin */ + mchunkptr nextchunk; /* next contiguous chunk */ + INTERNAL_SIZE_T nextsize; /* its size */ + int nextinuse; /* true if nextchunk is used */ + INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + /* free(0) has no effect */ + if (mem != 0) { + p = mem2chunk(mem); + size = chunksize(p); + + check_inuse_chunk(p); + + /* + If eligible, place chunk on a fastbin so it can be found + and used quickly in malloc. + */ + + if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast) + +#if TRIM_FASTBINS + /* + If TRIM_FASTBINS set, don't place chunks + bordering top into fastbins + */ + && (chunk_at_offset(p, size) != av->top) +#endif + ) { + + set_fastchunks(av); + fb = &(av->fastbins[fastbin_index(size)]); + p->fd = *fb; + *fb = p; + } + + /* + Consolidate other non-mmapped chunks as they arrive. + */ + + else if (!chunk_is_mmapped(p)) { + set_anychunks(av); + + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + + /* consolidate backward */ + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long)prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + /* get and clear inuse bit */ + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + set_head(nextchunk, nextsize); + + /* consolidate forward */ + if (!nextinuse) { + unlink(nextchunk, bck, fwd); + size += nextsize; + } + + /* + Place the chunk in unsorted chunk list. Chunks are + not placed into regular bins until after they have + been given one chance to be used in malloc. + */ + + bck = unsorted_chunks(av); + fwd = bck->fd; + p->bk = bck; + p->fd = fwd; + bck->fd = p; + fwd->bk = p; + + set_head(p, size | PREV_INUSE); + set_foot(p, size); + + check_free_chunk(p); + } + + /* + If the chunk borders the current high end of memory, + consolidate into top + */ + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + check_chunk(p); + } + + /* + If freeing a large space, consolidate possibly-surrounding + chunks. Then, if the total unused topmost memory exceeds trim + threshold, ask malloc_trim to reduce top. + + Unless max_fast is 0, we don't know if there are fastbins + bordering top, so we cannot tell for sure whether threshold + has been reached unless fastbins are consolidated. But we + don't want to consolidate on each free. As a compromise, + consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD + is reached. + */ + + if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { + if (have_fastchunks(av)) { + malloc_consolidate(av); + } + +#ifndef MORECORE_CANNOT_TRIM + if ((CHUNK_SIZE_T)(chunksize(av->top)) >= + (CHUNK_SIZE_T)(av->trim_threshold)) { + sYSTRIm(av->top_pad, av); + } +#endif + } + } + /* + If the chunk was allocated via mmap, release via munmap() + Note that if HAVE_MMAP is false but chunk_is_mmapped is + true, then user must have overwritten memory. There's nothing + we can do to catch this error unless DL_DEBUG is set, in which case + check_inuse_chunk (above) will have triggered error. + */ + + else { +#if HAVE_MMAP + INTERNAL_SIZE_T offset = p->prev_size; + av->n_mmaps--; + av->mmapped_mem -= (size + offset); + munmap((char *)p - offset, size + offset); +#endif + } + } +} + +/* + ------------------------- malloc_consolidate ------------------------- + + malloc_consolidate is a specialized version of free() that tears + down chunks held in fastbins. Free itself cannot be used for this + purpose since, among other things, it might place chunks back onto + fastbins. So, instead, we need to use a minor variant of the same + code. + + Also, because this routine needs to be called the first time through + malloc anyway, it turns out to be the perfect place to trigger + initialization code. +*/ + +#if __STD_C +static void malloc_consolidate(mstate av) +#else +static void malloc_consolidate(av) mstate av; +#endif +{ + mfastbinptr *fb; /* current fastbin being consolidated */ + mfastbinptr *maxfb; /* last fastbin (for loop control) */ + mchunkptr p; /* current chunk being consolidated */ + mchunkptr nextp; /* next chunk to consolidate */ + mchunkptr unsorted_bin; /* bin header */ + mchunkptr first_unsorted; /* chunk to link to */ + + /* These have same use as in free() */ + mchunkptr nextchunk; + INTERNAL_SIZE_T size; + INTERNAL_SIZE_T nextsize; + INTERNAL_SIZE_T prevsize; + int nextinuse; + mchunkptr bck; + mchunkptr fwd; + + /* + If max_fast is 0, we know that av hasn't + yet been initialized, in which case do so below + */ + + if (av->max_fast != 0) { + clear_fastchunks(av); + + unsorted_bin = unsorted_chunks(av); + + /* + Remove each chunk from fast bin and consolidate it, placing it + then in unsorted bin. Among other reasons for doing this, + placing in unsorted bin avoids needing to calculate actual bins + until malloc is sure that chunks aren't immediately going to be + reused anyway. + */ + + maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); + fb = &(av->fastbins[0]); + do { + if ((p = *fb) != 0) { + *fb = 0; + + do { + check_inuse_chunk(p); + nextp = p->fd; + + /* Slightly streamlined version of consolidation code in free() */ + size = p->size & ~PREV_INUSE; + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long)prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + set_head(nextchunk, nextsize); + + if (!nextinuse) { + size += nextsize; + unlink(nextchunk, bck, fwd); + } + + first_unsorted = unsorted_bin->fd; + unsorted_bin->fd = p; + first_unsorted->bk = p; + + set_head(p, size | PREV_INUSE); + p->bk = unsorted_bin; + p->fd = first_unsorted; + set_foot(p, size); + } + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + } + + } while ((p = nextp) != 0); + } + } while (fb++ != maxfb); + } else { + malloc_init_state(av); + check_malloc_state(); + } +} + +/* + ------------------------------ realloc ------------------------------ +*/ + +#if __STD_C +Void_t *rEALLOc(Void_t *oldmem, size_t bytes) +#else +Void_t *rEALLOc(oldmem, bytes) +Void_t *oldmem; +size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T nb; /* padded request size */ + + mchunkptr oldp; /* chunk corresponding to oldmem */ + INTERNAL_SIZE_T oldsize; /* its size */ + + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + Void_t *newmem; /* corresponding user mem */ + + mchunkptr next; /* next contiguous chunk after oldp */ + + mchunkptr remainder; /* extra space at end of newp */ + CHUNK_SIZE_T remainder_size; /* its size */ + + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + CHUNK_SIZE_T copysize; /* bytes to copy */ + unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ + INTERNAL_SIZE_T *s; /* copy source */ + INTERNAL_SIZE_T *d; /* copy destination */ + +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + fREe(oldmem); + return 0; + } +#endif + + /* realloc of null is supposed to be same as malloc */ + if (oldmem == 0) { + return mALLOc(bytes); + } + + checked_request2size(bytes, nb); + + oldp = mem2chunk(oldmem); + oldsize = chunksize(oldp); + + check_inuse_chunk(oldp); + + if (!chunk_is_mmapped(oldp)) { + + if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) { + /* already big enough; split below */ + newp = oldp; + newsize = oldsize; + } + + else { + next = chunk_at_offset(oldp, oldsize); + + /* Try to expand forward into top */ + if (next == av->top && + (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= + (CHUNK_SIZE_T)(nb + MINSIZE)) { + set_head_size(oldp, nb); + av->top = chunk_at_offset(oldp, nb); + set_head(av->top, (newsize - nb) | PREV_INUSE); + return chunk2mem(oldp); + } + + /* Try to expand forward into next chunk; split off remainder below */ + else if (next != av->top && + !inuse(next) && + (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= + (CHUNK_SIZE_T)(nb)) { + newp = oldp; + unlink(next, bck, fwd); + } + + /* allocate, copy, free */ + else { + newmem = mALLOc(nb - MALLOC_ALIGN_MASK); + if (newmem == 0) { + return 0; /* propagate failure */ + } + + newp = mem2chunk(newmem); + newsize = chunksize(newp); + + /* + Avoid copy if newp is next chunk after oldp. + */ + if (newp == next) { + newsize += oldsize; + newp = oldp; + } else { + /* + Unroll copy of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + copysize = oldsize - SIZE_SZ; + s = (INTERNAL_SIZE_T *)(oldmem); + d = (INTERNAL_SIZE_T *)(newmem); + ncopies = copysize / sizeof(INTERNAL_SIZE_T); + assert(ncopies >= 3); + + if (ncopies > 9) { + MALLOC_COPY(d, s, copysize); + } + + else { + *(d + 0) = *(s + 0); + *(d + 1) = *(s + 1); + *(d + 2) = *(s + 2); + if (ncopies > 4) { + *(d + 3) = *(s + 3); + *(d + 4) = *(s + 4); + if (ncopies > 6) { + *(d + 5) = *(s + 5); + *(d + 6) = *(s + 6); + if (ncopies > 8) { + *(d + 7) = *(s + 7); + *(d + 8) = *(s + 8); + } + } + } + } + + fREe(oldmem); + check_inuse_chunk(newp); + return chunk2mem(newp); + } + } + } + + /* If possible, free extra space in old or extended chunk */ + + assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb)); + + remainder_size = newsize - nb; + + if (remainder_size < MINSIZE) { /* not enough extra to split off */ + set_head_size(newp, newsize); + set_inuse_bit_at_offset(newp, newsize); + } else { /* split remainder */ + remainder = chunk_at_offset(newp, nb); + set_head_size(newp, nb); + set_head(remainder, remainder_size | PREV_INUSE); + /* Mark remainder as inuse so free() won't complain */ + set_inuse_bit_at_offset(remainder, remainder_size); + fREe(chunk2mem(remainder)); + } + + check_inuse_chunk(newp); + return chunk2mem(newp); + } + + /* + Handle mmap cases + */ + + else { +#if HAVE_MMAP + +#if HAVE_MREMAP + INTERNAL_SIZE_T offset = oldp->prev_size; + size_t pagemask = av->pagesize - 1; + char *cp; + CHUNK_SIZE_T sum; + + /* Note the extra SIZE_SZ overhead */ + newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask; + + /* don't need to remap if still within same page */ + if (oldsize == newsize - offset) { + return oldmem; + } + + cp = (char *)mremap((char *)oldp - offset, oldsize + offset, newsize, 1); + + if (cp != (char *)MORECORE_FAILURE) { + + newp = (mchunkptr)(cp + offset); + set_head(newp, (newsize - offset) | IS_MMAPPED); + + assert(aligned_OK(chunk2mem(newp))); + assert((newp->prev_size == offset)); + + /* update statistics */ + sum = av->mmapped_mem += newsize - oldsize; + if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) { + av->max_mmapped_mem = sum; + } + sum += av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { + av->max_total_mem = sum; + } + + return chunk2mem(newp); + } +#endif + + /* Note the extra SIZE_SZ overhead. */ + if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) { + newmem = oldmem; /* do nothing */ + } else { + /* Must alloc, copy, free. */ + newmem = mALLOc(nb - MALLOC_ALIGN_MASK); + if (newmem != 0) { + MALLOC_COPY(newmem, oldmem, oldsize - 2 * SIZE_SZ); + fREe(oldmem); + } + } + return newmem; + +#else + /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ + check_malloc_state(); + MALLOC_FAILURE_ACTION; + return 0; +#endif + } +} + +/* + ------------------------------ memalign ------------------------------ +*/ + +#if __STD_C +Void_t *mEMALIGn(size_t alignment, size_t bytes) +#else +Void_t *mEMALIGn(alignment, bytes) +size_t alignment; +size_t bytes; +#endif +{ + INTERNAL_SIZE_T nb; /* padded request size */ + char *m; /* memory returned by malloc call */ + mchunkptr p; /* corresponding chunk */ + char *brk; /* alignment point within p */ + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ + mchunkptr remainder; /* spare room at end to split off */ + CHUNK_SIZE_T remainder_size; /* its size */ + INTERNAL_SIZE_T size; + + /* If need less alignment than we give anyway, just relay to malloc */ + + if (alignment <= MALLOC_ALIGNMENT) { + return mALLOc(bytes); + } + + /* Otherwise, ensure that it is at least a minimum chunk size */ + + if (alignment < MINSIZE) { + alignment = MINSIZE; + } + + /* Make sure alignment is power of 2 (in case MINSIZE is not). */ + if ((alignment & (alignment - 1)) != 0) { + size_t a = MALLOC_ALIGNMENT * 2; + while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) { + a <<= 1; + } + alignment = a; + } + + checked_request2size(bytes, nb); + + /* + Strategy: find a spot within that chunk that meets the alignment + request, and then possibly free the leading and trailing space. + */ + + /* Call malloc with worst case padding to hit alignment. */ + + m = (char *)(mALLOc(nb + alignment + MINSIZE)); + + if (m == 0) { + return 0; /* propagate failure */ + } + + p = mem2chunk(m); + + if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */ + + /* + Find an aligned spot inside chunk. Since we need to give back + leading space in a chunk of at least MINSIZE, if the first + calculation places us at a spot with less than MINSIZE leader, + we can move to the next aligned spot -- we've allocated enough + total room so that this is always possible. + */ + + brk = (char *)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) & + -((signed long)alignment))); + if ((CHUNK_SIZE_T)(brk - (char *)(p)) < MINSIZE) { + brk += alignment; + } + + newp = (mchunkptr)brk; + leadsize = brk - (char *)(p); + newsize = chunksize(p) - leadsize; + + /* For mmapped chunks, just adjust offset */ + if (chunk_is_mmapped(p)) { + newp->prev_size = p->prev_size + leadsize; + set_head(newp, newsize | IS_MMAPPED); + return chunk2mem(newp); + } + + /* Otherwise, give back leader, use the rest */ + set_head(newp, newsize | PREV_INUSE); + set_inuse_bit_at_offset(newp, newsize); + set_head_size(p, leadsize); + fREe(chunk2mem(p)); + p = newp; + + assert(newsize >= nb && + (((PTR_UINT)(chunk2mem(p))) % alignment) == 0); + } + + /* Also give back spare room at the end */ + if (!chunk_is_mmapped(p)) { + size = chunksize(p); + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(p, nb); + set_head(remainder, remainder_size | PREV_INUSE); + set_head_size(p, nb); + fREe(chunk2mem(remainder)); + } + } + + check_inuse_chunk(p); + return chunk2mem(p); +} + +/* + ------------------------------ calloc ------------------------------ +*/ + +#if __STD_C +Void_t *cALLOc(size_t n_elements, size_t elem_size) +#else +Void_t *cALLOc(n_elements, elem_size) +size_t n_elements; +size_t elem_size; +#endif +{ + mchunkptr p; + CHUNK_SIZE_T clearsize; + CHUNK_SIZE_T nclears; + INTERNAL_SIZE_T *d; + + Void_t *mem = mALLOc(n_elements * elem_size); + + if (mem != 0) { + p = mem2chunk(mem); + + if (!chunk_is_mmapped(p)) { + /* + Unroll clear of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + d = (INTERNAL_SIZE_T *)mem; + clearsize = chunksize(p) - SIZE_SZ; + nclears = clearsize / sizeof(INTERNAL_SIZE_T); + assert(nclears >= 3); + + if (nclears > 9) { + MALLOC_ZERO(d, clearsize); + } + + else { + *(d + 0) = 0; + *(d + 1) = 0; + *(d + 2) = 0; + if (nclears > 4) { + *(d + 3) = 0; + *(d + 4) = 0; + if (nclears > 6) { + *(d + 5) = 0; + *(d + 6) = 0; + if (nclears > 8) { + *(d + 7) = 0; + *(d + 8) = 0; + } + } + } + } + } +#if !MMAP_CLEARS + else { + d = (INTERNAL_SIZE_T *)mem; + /* + Note the additional SIZE_SZ + */ + clearsize = chunksize(p) - 2 * SIZE_SZ; + MALLOC_ZERO(d, clearsize); + } +#endif + } + return mem; +} + +/* + ------------------------------ cfree ------------------------------ +*/ + +#if __STD_C +void cFREe(Void_t *mem) +#else +void cFREe(mem) Void_t *mem; +#endif +{ + fREe(mem); +} + +/* + ------------------------- independent_calloc ------------------------- +*/ + +#if __STD_C +Void_t **iCALLOc(size_t n_elements, size_t elem_size, Void_t *chunks[]) +#else +Void_t **iCALLOc(n_elements, elem_size, chunks) +size_t n_elements; +size_t elem_size; +Void_t *chunks[]; +#endif +{ + size_t sz = elem_size; /* serves as 1-element array */ + /* opts arg of 3 means all elements are same size, and should be cleared */ + return iALLOc(n_elements, &sz, 3, chunks); +} + +/* + ------------------------- independent_comalloc ------------------------- +*/ + +#if __STD_C +Void_t **iCOMALLOc(size_t n_elements, size_t sizes[], Void_t *chunks[]) +#else +Void_t **iCOMALLOc(n_elements, sizes, chunks) +size_t n_elements; +size_t sizes[]; +Void_t *chunks[]; +#endif +{ + return iALLOc(n_elements, sizes, 0, chunks); +} + +/* + ------------------------------ ialloc ------------------------------ + ialloc provides common support for independent_X routines, handling all of + the combinations that can result. + + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed +*/ + +#if __STD_C +static Void_t **iALLOc(size_t n_elements, + size_t *sizes, + int opts, + Void_t *chunks[]) +#else +static Void_t **iALLOc(n_elements, sizes, opts, chunks) +size_t n_elements; +size_t *sizes; +int opts; +Void_t *chunks[]; +#endif +{ + mstate av = get_malloc_state(); + INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */ + INTERNAL_SIZE_T contents_size; /* total size of elements */ + INTERNAL_SIZE_T array_size; /* request size of pointer array */ + Void_t *mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */ + Void_t **marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + int mmx; /* to disable mmap */ + INTERNAL_SIZE_T size; + size_t i; + + /* Ensure initialization */ + if (av->max_fast == 0) { + malloc_consolidate(av); + } + + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) { + return chunks; /* nothing to do */ + } + marray = chunks; + array_size = 0; + } else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) { + return (Void_t **)mALLOc(0); + } + marray = 0; + array_size = request2size(n_elements * (sizeof(Void_t *))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) { + contents_size += request2size(sizes[i]); + } + } + + /* subtract out alignment bytes from total to minimize overallocation */ + size = contents_size + array_size - MALLOC_ALIGN_MASK; + + /* + Allocate the aggregate chunk. + But first disable mmap so malloc won't use it, since + we would not be able to later free/realloc space internal + to a segregated mmap region. + */ + mmx = av->n_mmaps_max; /* disable mmap */ + av->n_mmaps_max = 0; + mem = mALLOc(size); + av->n_mmaps_max = mmx; /* reset mmap */ + if (mem == 0) { + return 0; + } + + p = mem2chunk(mem); + assert(!chunk_is_mmapped(p)); + remainder_size = chunksize(p); + + if (opts & 0x2) { /* optionally clear the elements */ + MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + array_chunk = chunk_at_offset(p, contents_size); + marray = (Void_t **)(chunk2mem(array_chunk)); + set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0;; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements - 1) { + if (element_size != 0) { + size = element_size; + } else { + size = request2size(sizes[i]); + } + remainder_size -= size; + set_head(p, size | PREV_INUSE); + p = chunk_at_offset(p, size); + } else { /* the final element absorbs any overallocation slop */ + set_head(p, remainder_size | PREV_INUSE); + break; + } + } + +#if DL_DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(mem2chunk(marray)); + } + + for (i = 0; i != n_elements; ++i) { + check_inuse_chunk(mem2chunk(marray[i])); + } +#endif + + return marray; +} + +/* + ------------------------------ valloc ------------------------------ +*/ + +#if __STD_C +Void_t *vALLOc(size_t bytes) +#else +Void_t *vALLOc(bytes) +size_t bytes; +#endif +{ + /* Ensure initialization */ + mstate av = get_malloc_state(); + if (av->max_fast == 0) { + malloc_consolidate(av); + } + return mEMALIGn(av->pagesize, bytes); +} + +/* + ------------------------------ pvalloc ------------------------------ +*/ + +#if __STD_C +Void_t *pVALLOc(size_t bytes) +#else +Void_t *pVALLOc(bytes) +size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + size_t pagesz; + + /* Ensure initialization */ + if (av->max_fast == 0) { + malloc_consolidate(av); + } + pagesz = av->pagesize; + return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); +} + +/* + ------------------------------ malloc_trim ------------------------------ +*/ + +#if __STD_C +int mTRIm(size_t pad) +#else +int mTRIm(pad) +size_t pad; +#endif +{ + mstate av = get_malloc_state(); + /* Ensure initialization/consolidation */ + malloc_consolidate(av); + +#ifndef MORECORE_CANNOT_TRIM + return sYSTRIm(pad, av); +#else + return 0; +#endif +} + +/* + ------------------------- malloc_usable_size ------------------------- +*/ + +#if __STD_C +size_t mUSABLe(Void_t *mem) +#else +size_t mUSABLe(mem) +Void_t *mem; +#endif +{ + mchunkptr p; + if (mem != 0) { + p = mem2chunk(mem); + if (chunk_is_mmapped(p)) { + return chunksize(p) - 2 * SIZE_SZ; + } else if (inuse(p)) { + return chunksize(p) - SIZE_SZ; + } + } + return 0; +} + +/* + ------------------------------ mallinfo ------------------------------ +*/ + +struct mallinfo mALLINFo() { + mstate av = get_malloc_state(); + struct mallinfo mi; + int i; + mbinptr b; + mchunkptr p; + INTERNAL_SIZE_T avail; + INTERNAL_SIZE_T fastavail; + int nblocks; + int nfastblocks; + + /* Ensure initialization */ + if (av->top == 0) { + malloc_consolidate(av); + } + + check_malloc_state(); + + /* Account for top */ + avail = chunksize(av->top); + nblocks = 1; /* top always exists */ + + /* traverse fastbins */ + nfastblocks = 0; + fastavail = 0; + + for (i = 0; NFASTBINS - i > 0; ++i) { + for (p = av->fastbins[i]; p != 0; p = p->fd) { + ++nfastblocks; + fastavail += chunksize(p); + } + } + + avail += fastavail; + + /* traverse regular bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av, i); + for (p = last(b); p != b; p = p->bk) { + ++nblocks; + avail += chunksize(p); + } + } + + mi.smblks = nfastblocks; + mi.ordblks = nblocks; + mi.fordblks = avail; + mi.uordblks = av->sbrked_mem - avail; + mi.arena = av->sbrked_mem; + mi.hblks = av->n_mmaps; + mi.hblkhd = av->mmapped_mem; + mi.fsmblks = fastavail; + mi.keepcost = chunksize(av->top); + mi.usmblks = av->max_total_mem; + return mi; +} + +/* + ------------------------------ malloc_stats ------------------------------ +*/ + +void mSTATs(void) { + struct mallinfo mi = mALLINFo(); + +#ifdef WIN32 + { + CHUNK_SIZE_T free, reserved, committed; + vminfo(&free, &reserved, &committed); + fprintf(stderr, "free bytes = %10lu\n", + free); + fprintf(stderr, "reserved bytes = %10lu\n", + reserved); + fprintf(stderr, "committed bytes = %10lu\n", + committed); + } +#endif + + fprintf(stderr, "max system bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.usmblks)); + fprintf(stderr, "system bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.arena + mi.hblkhd)); + fprintf(stderr, "in use bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd)); + +#ifdef WIN32 + { + CHUNK_SIZE_T kernel, user; + if (cpuinfo(TRUE, &kernel, &user)) { + fprintf(stderr, "kernel ms = %10lu\n", + kernel); + fprintf(stderr, "user ms = %10lu\n", + user); + } + } +#endif +} + +/* + ------------------------------ mallopt ------------------------------ +*/ + +#if __STD_C +int mALLOPt(int param_number, int value) +#else +int mALLOPt(param_number, value) +int param_number; +int value; +#endif +{ + mstate av = get_malloc_state(); + /* Ensure initialization/consolidation */ + malloc_consolidate(av); + + switch (param_number) { + case M_MXFAST: + if (value >= 0 && value <= MAX_FAST_SIZE) { + set_max_fast(av, value); + return 1; + } else { + return 0; + } + + case M_TRIM_THRESHOLD: + av->trim_threshold = value; + return 1; + + case M_TOP_PAD: + av->top_pad = value; + return 1; + + case M_MMAP_THRESHOLD: + av->mmap_threshold = value; + return 1; + + case M_MMAP_MAX: +#if !HAVE_MMAP + if (value != 0) { + return 0; + } +#endif + av->n_mmaps_max = value; + return 1; + + default: + return 0; + } +} + +/* + -------------------- Alternative MORECORE functions -------------------- +*/ + +/* + General Requirements for MORECORE. + + The MORECORE function must have the following properties: + + If MORECORE_CONTIGUOUS is false: + + * MORECORE must allocate in multiples of pagesize. It will + only be called with arguments that are multiples of pagesize. + + * MORECORE(0) must return an address that is at least + MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) + + else (i.e. If MORECORE_CONTIGUOUS is true): + + * Consecutive calls to MORECORE with positive arguments + return increasing addresses, indicating that space has been + contiguously extended. + + * MORECORE need not allocate in multiples of pagesize. + Calls to MORECORE need not have args of multiples of pagesize. + + * MORECORE need not page-align. + + In either case: + + * MORECORE may allocate more memory than requested. (Or even less, + but this will generally result in a malloc failure.) + + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. This malloc does NOT call MORECORE(0) + until at least one call with positive arguments is made, so + the initial value returned is not important. + + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + + * MORECORE need not handle negative arguments -- it may instead + just return MORECORE_FAILURE when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + There is some variation across systems about the type of the + argument to sbrk/MORECORE. If size_t is unsigned, then it cannot + actually be size_t, because sbrk supports negative args, so it is + normally the signed type of the same width as size_t (sometimes + declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much + matter though. Internally, we use "long" as arguments, which should + work across all reasonable possibilities. + + Additionally, if MORECORE ever returns failure for a positive + request, and HAVE_MMAP is true, then mmap is used as a noncontiguous + system allocator. This is a useful backup strategy for systems with + holes in address spaces -- in this case sbrk cannot contiguously + expand the heap, but mmap may be able to map noncontiguous space. + + If you'd like mmap to ALWAYS be used, you can define MORECORE to be + a function that always returns MORECORE_FAILURE. + + Malloc only has limited ability to detect failures of MORECORE + to supply contiguous space when it says it can. In particular, + multithreaded programs that do not use locks may result in + rece conditions across calls to MORECORE that result in gaps + that cannot be detected as such, and subsequent corruption. + + If you are using this malloc with something other than sbrk (or its + emulation) to supply memory regions, you probably want to set + MORECORE_CONTIGUOUS as false. As an example, here is a custom + allocator kindly contributed for pre-OSX macOS. It uses virtually + but not necessarily physically contiguous non-paged memory (locked + in, present and won't get swapped out). You can use it by + uncommenting this section, adding some #includes, and setting up the + appropriate defines above: + + #define MORECORE osMoreCore + #define MORECORE_CONTIGUOUS 0 + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MORECORE_FAILURE; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MORECORE_FAILURE; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + +/* + -------------------------------------------------------------- + + Emulation of sbrk for win32. + Donated by J. Walter . + For additional information about this code, and malloc on Win32, see + http://www.genesys-e.de/jwalter/ +*/ + +#ifdef WIN32 + +#ifdef _DEBUG +/* #define TRACE */ +#endif + +/* Support for USE_MALLOC_LOCK */ +#ifdef USE_MALLOC_LOCK + +/* Wait for spin lock */ +static int slwait(int *sl) { + while (InterlockedCompareExchange((void **)sl, (void *)1, (void *)0) != 0) { + Sleep(0); + } + return 0; +} + +/* Release spin lock */ +static int slrelease(int *sl) { + InterlockedExchange(sl, 0); + return 0; +} + +#ifdef NEEDED +/* Spin lock for emulation code */ +static int g_sl; +#endif + +#endif /* USE_MALLOC_LOCK */ + +/* getpagesize for windows */ +static long getpagesize(void) { + static long g_pagesize = 0; + if (!g_pagesize) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + g_pagesize = system_info.dwPageSize; + } + return g_pagesize; +} +static long getregionsize(void) { + static long g_regionsize = 0; + if (!g_regionsize) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + g_regionsize = system_info.dwAllocationGranularity; + } + return g_regionsize; +} + +/* A region list entry */ +typedef struct _region_list_entry { + void *top_allocated; + void *top_committed; + void *top_reserved; + long reserve_size; + struct _region_list_entry *previous; +} region_list_entry; + +/* Allocate and link a region entry in the region list */ +static int region_list_append(region_list_entry **last, void *base_reserved, long reserve_size) { + region_list_entry *next = HeapAlloc(GetProcessHeap(), 0, sizeof(region_list_entry)); + if (!next) { + return FALSE; + } + next->top_allocated = (char *)base_reserved; + next->top_committed = (char *)base_reserved; + next->top_reserved = (char *)base_reserved + reserve_size; + next->reserve_size = reserve_size; + next->previous = *last; + *last = next; + return TRUE; +} +/* Free and unlink the last region entry from the region list */ +static int region_list_remove(region_list_entry **last) { + region_list_entry *previous = (*last)->previous; + if (!HeapFree(GetProcessHeap(), sizeof(region_list_entry), *last)) { + return FALSE; + } + *last = previous; + return TRUE; +} + +#define CEIL(size, to) (((size) + (to) - 1) & ~((to) - 1)) +#define FLOOR(size, to) ((size) & ~((to) - 1)) + +#define SBRK_SCALE 0 +/* #define SBRK_SCALE 1 */ +/* #define SBRK_SCALE 2 */ +/* #define SBRK_SCALE 4 */ + +/* sbrk for windows */ +static void *sbrk(long size) { + static long g_pagesize, g_my_pagesize; + static long g_regionsize, g_my_regionsize; + static region_list_entry *g_last; + void *result = (void *)MORECORE_FAILURE; +#ifdef TRACE + printf("sbrk %d\n", size); +#endif +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Wait for spin lock */ + slwait(&g_sl); +#endif + /* First time initialization */ + if (!g_pagesize) { + g_pagesize = getpagesize(); + g_my_pagesize = g_pagesize << SBRK_SCALE; + } + if (!g_regionsize) { + g_regionsize = getregionsize(); + g_my_regionsize = g_regionsize << SBRK_SCALE; + } + if (!g_last) { + if (!region_list_append(&g_last, 0, 0)) { + goto sbrk_exit; + } + } + /* Assert invariants */ + assert(g_last); + assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated && + g_last->top_allocated <= g_last->top_committed); + assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed && + g_last->top_committed <= g_last->top_reserved && + (unsigned)g_last->top_committed % g_pagesize == 0); + assert((unsigned)g_last->top_reserved % g_regionsize == 0); + assert((unsigned)g_last->reserve_size % g_regionsize == 0); + /* Allocation requested? */ + if (size >= 0) { + /* Allocation size is the requested size */ + long allocate_size = size; + /* Compute the size to commit */ + long to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; + /* Do we reach the commit limit? */ + if (to_commit > 0) { + /* Round size to commit */ + long commit_size = CEIL(to_commit, g_my_pagesize); + /* Compute the size to reserve */ + long to_reserve = (char *)g_last->top_committed + commit_size - (char *)g_last->top_reserved; + /* Do we reach the reserve limit? */ + if (to_reserve > 0) { + /* Compute the remaining size to commit in the current region */ + long remaining_commit_size = (char *)g_last->top_reserved - (char *)g_last->top_committed; + if (remaining_commit_size > 0) { + /* Assert preconditions */ + assert((unsigned)g_last->top_committed % g_pagesize == 0); + assert(0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0); + { + /* Commit this */ + void *base_committed = VirtualAlloc(g_last->top_committed, remaining_commit_size, + MEM_COMMIT, PAGE_READWRITE); + /* Check returned pointer for consistency */ + if (base_committed != g_last->top_committed) { + goto sbrk_exit; + } + /* Assert postconditions */ + assert((unsigned)base_committed % g_pagesize == 0); +#ifdef TRACE + printf("Commit %p %d\n", base_committed, remaining_commit_size); +#endif + /* Adjust the regions commit top */ + g_last->top_committed = (char *)base_committed + remaining_commit_size; + } + } + { + /* Now we are going to search and reserve. */ + int contiguous = -1; + int found = FALSE; + MEMORY_BASIC_INFORMATION memory_info; + void *base_reserved; + long reserve_size; + do { + /* Assume contiguous memory */ + contiguous = TRUE; + /* Round size to reserve */ + reserve_size = CEIL(to_reserve, g_my_regionsize); + /* Start with the current region's top */ + memory_info.BaseAddress = g_last->top_reserved; + /* Assert preconditions */ + assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); + assert(0 < reserve_size && reserve_size % g_regionsize == 0); + while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) { + /* Assert postconditions */ + assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); +#ifdef TRACE + printf("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize, + memory_info.State == MEM_FREE ? "FREE" : (memory_info.State == MEM_RESERVE ? "RESERVED" : (memory_info.State == MEM_COMMIT ? "COMMITTED" : "?"))); +#endif + /* Region is free, well aligned and big enough: we are done */ + if (memory_info.State == MEM_FREE && + (unsigned)memory_info.BaseAddress % g_regionsize == 0 && + memory_info.RegionSize >= (unsigned)reserve_size) { + found = TRUE; + break; + } + /* From now on we can't get contiguous memory! */ + contiguous = FALSE; + /* Recompute size to reserve */ + reserve_size = CEIL(allocate_size, g_my_regionsize); + memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize; + /* Assert preconditions */ + assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); + assert(0 < reserve_size && reserve_size % g_regionsize == 0); + } + /* Search failed? */ + if (!found) { + goto sbrk_exit; + } + /* Assert preconditions */ + assert((unsigned)memory_info.BaseAddress % g_regionsize == 0); + assert(0 < reserve_size && reserve_size % g_regionsize == 0); + /* Try to reserve this */ + base_reserved = VirtualAlloc(memory_info.BaseAddress, reserve_size, + MEM_RESERVE, PAGE_NOACCESS); + if (!base_reserved) { + int rc = GetLastError(); + if (rc != ERROR_INVALID_ADDRESS) { + goto sbrk_exit; + } + } + /* A null pointer signals (hopefully) a race condition with another thread. */ + /* In this case, we try again. */ + } while (!base_reserved); + /* Check returned pointer for consistency */ + if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress) { + goto sbrk_exit; + } + /* Assert postconditions */ + assert((unsigned)base_reserved % g_regionsize == 0); +#ifdef TRACE + printf("Reserve %p %d\n", base_reserved, reserve_size); +#endif + /* Did we get contiguous memory? */ + if (contiguous) { + long start_size = (char *)g_last->top_committed - (char *)g_last->top_allocated; + /* Adjust allocation size */ + allocate_size -= start_size; + /* Adjust the regions allocation top */ + g_last->top_allocated = g_last->top_committed; + /* Recompute the size to commit */ + to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; + /* Round size to commit */ + commit_size = CEIL(to_commit, g_my_pagesize); + } + /* Append the new region to the list */ + if (!region_list_append(&g_last, base_reserved, reserve_size)) { + goto sbrk_exit; + } + /* Didn't we get contiguous memory? */ + if (!contiguous) { + /* Recompute the size to commit */ + to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; + /* Round size to commit */ + commit_size = CEIL(to_commit, g_my_pagesize); + } + } + } + /* Assert preconditions */ + assert((unsigned)g_last->top_committed % g_pagesize == 0); + assert(0 < commit_size && commit_size % g_pagesize == 0); + { + /* Commit this */ + void *base_committed = VirtualAlloc(g_last->top_committed, commit_size, + MEM_COMMIT, PAGE_READWRITE); + /* Check returned pointer for consistency */ + if (base_committed != g_last->top_committed) { + goto sbrk_exit; + } + /* Assert postconditions */ + assert((unsigned)base_committed % g_pagesize == 0); +#ifdef TRACE + printf("Commit %p %d\n", base_committed, commit_size); +#endif + /* Adjust the regions commit top */ + g_last->top_committed = (char *)base_committed + commit_size; + } + } + /* Adjust the regions allocation top */ + g_last->top_allocated = (char *)g_last->top_allocated + allocate_size; + result = (char *)g_last->top_allocated - size; + /* Deallocation requested? */ + } else if (size < 0) { + long deallocate_size = -size; + /* As long as we have a region to release */ + while ((char *)g_last->top_allocated - deallocate_size < (char *)g_last->top_reserved - g_last->reserve_size) { + /* Get the size to release */ + long release_size = g_last->reserve_size; + /* Get the base address */ + void *base_reserved = (char *)g_last->top_reserved - release_size; + /* Assert preconditions */ + assert((unsigned)base_reserved % g_regionsize == 0); + assert(0 < release_size && release_size % g_regionsize == 0); + { + /* Release this */ + int rc = VirtualFree(base_reserved, 0, + MEM_RELEASE); + /* Check returned code for consistency */ + if (!rc) { + goto sbrk_exit; + } +#ifdef TRACE + printf("Release %p %d\n", base_reserved, release_size); +#endif + } + /* Adjust deallocation size */ + deallocate_size -= (char *)g_last->top_allocated - (char *)base_reserved; + /* Remove the old region from the list */ + if (!region_list_remove(&g_last)) { + goto sbrk_exit; + } + } + { + /* Compute the size to decommit */ + long to_decommit = (char *)g_last->top_committed - ((char *)g_last->top_allocated - deallocate_size); + if (to_decommit >= g_my_pagesize) { + /* Compute the size to decommit */ + long decommit_size = FLOOR(to_decommit, g_my_pagesize); + /* Compute the base address */ + void *base_committed = (char *)g_last->top_committed - decommit_size; + /* Assert preconditions */ + assert((unsigned)base_committed % g_pagesize == 0); + assert(0 < decommit_size && decommit_size % g_pagesize == 0); + { + /* Decommit this */ + int rc = VirtualFree((char *)base_committed, decommit_size, + MEM_DECOMMIT); + /* Check returned code for consistency */ + if (!rc) { + goto sbrk_exit; + } +#ifdef TRACE + printf("Decommit %p %d\n", base_committed, decommit_size); +#endif + } + /* Adjust deallocation size and regions commit and allocate top */ + deallocate_size -= (char *)g_last->top_allocated - (char *)base_committed; + g_last->top_committed = base_committed; + g_last->top_allocated = base_committed; + } + } + /* Adjust regions allocate top */ + g_last->top_allocated = (char *)g_last->top_allocated - deallocate_size; + /* Check for underflow */ + if ((char *)g_last->top_reserved - g_last->reserve_size > (char *)g_last->top_allocated || + g_last->top_allocated > g_last->top_committed) { + /* Adjust regions allocate top */ + g_last->top_allocated = (char *)g_last->top_reserved - g_last->reserve_size; + goto sbrk_exit; + } + result = g_last->top_allocated; + } + /* Assert invariants */ + assert(g_last); + assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated && + g_last->top_allocated <= g_last->top_committed); + assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed && + g_last->top_committed <= g_last->top_reserved && + (unsigned)g_last->top_committed % g_pagesize == 0); + assert((unsigned)g_last->top_reserved % g_regionsize == 0); + assert((unsigned)g_last->reserve_size % g_regionsize == 0); + +sbrk_exit: +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Release spin lock */ + slrelease(&g_sl); +#endif + return result; +} + +/* mmap for windows */ +static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg) { + static long g_pagesize; + static long g_regionsize; +#ifdef TRACE + printf("mmap %d\n", size); +#endif +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Wait for spin lock */ + slwait(&g_sl); +#endif + /* First time initialization */ + if (!g_pagesize) { + g_pagesize = getpagesize(); + } + if (!g_regionsize) { + g_regionsize = getregionsize(); + } + /* Assert preconditions */ + assert((unsigned)ptr % g_regionsize == 0); + assert(size % g_pagesize == 0); + /* Allocate this */ + ptr = VirtualAlloc(ptr, size, + MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE); + if (!ptr) { + ptr = (void *)MORECORE_FAILURE; + goto mmap_exit; + } + /* Assert postconditions */ + assert((unsigned)ptr % g_regionsize == 0); +#ifdef TRACE + printf("Commit %p %d\n", ptr, size); +#endif +mmap_exit: +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Release spin lock */ + slrelease(&g_sl); +#endif + return ptr; +} + +/* munmap for windows */ +static long munmap(void *ptr, long size) { + static long g_pagesize; + static long g_regionsize; + int rc = MUNMAP_FAILURE; +#ifdef TRACE + printf("munmap %p %d\n", ptr, size); +#endif +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Wait for spin lock */ + slwait(&g_sl); +#endif + /* First time initialization */ + if (!g_pagesize) { + g_pagesize = getpagesize(); + } + if (!g_regionsize) { + g_regionsize = getregionsize(); + } + /* Assert preconditions */ + assert((unsigned)ptr % g_regionsize == 0); + assert(size % g_pagesize == 0); + /* Free this */ + if (!VirtualFree(ptr, 0, + MEM_RELEASE)) { + goto munmap_exit; + } + rc = 0; +#ifdef TRACE + printf("Release %p %d\n", ptr, size); +#endif +munmap_exit: +#if defined(USE_MALLOC_LOCK) && defined(NEEDED) + /* Release spin lock */ + slrelease(&g_sl); +#endif + return rc; +} + +static void vminfo(CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T *committed) { + MEMORY_BASIC_INFORMATION memory_info; + memory_info.BaseAddress = 0; + *free = *reserved = *committed = 0; + while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) { + switch (memory_info.State) { + case MEM_FREE: + *free += memory_info.RegionSize; + break; + case MEM_RESERVE: + *reserved += memory_info.RegionSize; + break; + case MEM_COMMIT: + *committed += memory_info.RegionSize; + break; + } + memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize; + } +} + +static int cpuinfo(int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) { + if (whole) { + __int64 creation64, exit64, kernel64, user64; + int rc = GetProcessTimes(GetCurrentProcess(), + (FILETIME *)&creation64, + (FILETIME *)&exit64, + (FILETIME *)&kernel64, + (FILETIME *)&user64); + if (!rc) { + *kernel = 0; + *user = 0; + return FALSE; + } + *kernel = (CHUNK_SIZE_T)(kernel64 / 10000); + *user = (CHUNK_SIZE_T)(user64 / 10000); + return TRUE; + } else { + __int64 creation64, exit64, kernel64, user64; + int rc = GetThreadTimes(GetCurrentThread(), + (FILETIME *)&creation64, + (FILETIME *)&exit64, + (FILETIME *)&kernel64, + (FILETIME *)&user64); + if (!rc) { + *kernel = 0; + *user = 0; + return FALSE; + } + *kernel = (CHUNK_SIZE_T)(kernel64 / 10000); + *user = (CHUNK_SIZE_T)(user64 / 10000); + return TRUE; + } +} + +#endif /* WIN32 */ + +/* ------------------------------------------------------------ +History: + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from . + Thanks also to Andreas Mueller , + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sYSMALLOc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif + +#endif /* MALLOC_270_H */ diff --git a/vendor/rlights.h b/vendor/rlights.h index aba907b5fb..cee06360ee 100644 --- a/vendor/rlights.h +++ b/vendor/rlights.h @@ -167,4 +167,4 @@ void UpdateLightValues(Shader shader, Light light) SetShaderValue(shader, light.colorLoc, color, SHADER_UNIFORM_VEC4); } -#endif // RLIGHTS_IMPLEMENTATION \ No newline at end of file +#endif // RLIGHTS_IMPLEMENTATION From 323c42c12e6d4101063cc9d1a2258bde570ddc5a Mon Sep 17 00:00:00 2001 From: Andrew LeFevre Date: Mon, 27 Apr 2026 20:22:28 -0400 Subject: [PATCH 2/4] almost working --- build.sh | 24 ++-- config/impulse_wars.ini | 12 +- ocean/impulse_wars/benchmark.c | 16 +-- ocean/impulse_wars/binding.c | 204 ++++++++++-------------------- ocean/impulse_wars/binding.h | 177 -------------------------- ocean/impulse_wars/env.h | 79 +++++++----- ocean/impulse_wars/game.h | 26 ++-- ocean/impulse_wars/impulse_wars.c | 10 +- ocean/impulse_wars/map.h | 12 +- ocean/impulse_wars/render.h | 4 +- ocean/impulse_wars/types.h | 10 +- 11 files changed, 183 insertions(+), 391 deletions(-) delete mode 100644 ocean/impulse_wars/binding.h diff --git a/build.sh b/build.sh index 492c033686..6cb06a7403 100755 --- a/build.sh +++ b/build.sh @@ -75,6 +75,7 @@ CLANG_WARN=( -Wno-incompatible-pointer-types-discards-qualifiers -Wno-error=array-parameter ) +CLANG_OPT=() download() { local name=$1 url=$2 @@ -108,14 +109,20 @@ elif [ "$ENV" = "trailer" ]; then OUTPUT_NAME="trailer/trailer" elif [ "$ENV" = "impulse_wars" ]; then SRC_DIR="ocean/$ENV" - if [ "$MODE" = "web" ]; then BOX2D_NAME='box2d-web' - elif [ "$PLATFORM" = "Linux" ]; then BOX2D_NAME='box2d-linux-amd64' - else BOX2D_NAME='box2d-macos-arm64' + if [ "$MODE" = "web" ]; then + BOX2D_NAME='box2d-web' + elif [ "$PLATFORM" = "Linux" ]; then + BOX2D_NAME='box2d-linux-amd64' + else + BOX2D_NAME='box2d-macos-arm64' fi + BOX2D_URL="https://github.com/capnspacehook/box2d/releases/latest/download" download "$BOX2D_NAME" "$BOX2D_URL/$BOX2D_NAME.tar.gz" INCLUDES+=(-I./$BOX2D_NAME/include -I./$BOX2D_NAME/src) LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a") + + CLANG_OPT=(-flto -fno-math-errno -march=native) elif [ -d "ocean/$ENV" ]; then SRC_DIR="ocean/$ENV" else @@ -126,11 +133,11 @@ OUTPUT_NAME=${OUTPUT_NAME:-$ENV} # Standalone environment build if [ -n "$DEBUG" ] || [ "$MODE" = "local" ]; then - CLANG_OPT=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}") + CLANG_OPT+=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}") NVCC_OPT="-O0 -g" LINK_OPT="-g" else - CLANG_OPT=(-O2 -DNDEBUG "${CLANG_WARN[@]}") + CLANG_OPT+=(-O2 -DNDEBUG "${CLANG_WARN[@]}") NVCC_OPT="-O2 --threads 0" LINK_OPT="-O2" fi @@ -242,6 +249,7 @@ echo "Compiling static library for $ENV..." ${CC:-clang} -c "${CLANG_OPT[@]}" $EXTRA_CFLAGS \ -I. -Isrc -I$SRC_DIR -Ivendor \ -I./$RAYLIB_NAME/include -I$CUDA_HOME/include \ + "${INCLUDES[@]}" \ -DPLATFORM_DESKTOP \ -fno-semantic-interposition -fvisibility=hidden \ -fPIC -fopenmp \ @@ -273,7 +281,7 @@ if [ -z "$MODE" ]; then LINK_CMD=( ${CXX:-g++} -shared -fPIC -fopenmp - build/bindings.o "$STATIC_LIB" "$RAYLIB_A" + build/bindings.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}" -L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG "${WHEEL_RPATH_FLAGS[@]}" -lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn @@ -298,7 +306,7 @@ elif [ "$MODE" = "cpu" ]; then src/bindings_cpu.cpp -o build/bindings_cpu.o LINK_CMD=( ${CXX:-g++} -shared -fPIC -fopenmp - build/bindings_cpu.o "$STATIC_LIB" "$RAYLIB_A" + build/bindings_cpu.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}" -lm -lpthread $OMP_LIB $LINK_OPT "${SHARED_LDFLAGS[@]}" -o "$OUTPUT" @@ -317,7 +325,7 @@ elif [ "$MODE" = "profile" ]; then $PRECISION \ -Xcompiler=-fopenmp \ tests/profile_kernels.cu vendor/ini.c \ - "$STATIC_LIB" "$RAYLIB_A" \ + "$STATIC_LIB" "${LINK_ARCHIVES[@]}" \ -lnccl -lnvidia-ml -lcublas -lcurand -lcudnn \ -lGL -lm -lpthread $OMP_LIB \ -o profile diff --git a/config/impulse_wars.ini b/config/impulse_wars.ini index 3e7c7f7bbb..772c08306f 100644 --- a/config/impulse_wars.ini +++ b/config/impulse_wars.ini @@ -26,6 +26,17 @@ sitting_duck = False continuous = False is_training = True +reward_win = 2.0 +reward_self_kill = -1.0 +reward_enemy_death = 1.0 +reward_enemy_kill = 1.0 +reward_death = 0.0 +reward_energy_emptied = -0.75 +reward_weapon_pickup = 0.5 +reward_shield_break = 0.5 +reward_shot_hit_coef = 0.005 +reward_explosion_hit_coef = 0.005 + [train] total_timesteps = 1_000_000_000 checkpoint_interval = 250 @@ -36,7 +47,6 @@ compile = False compile_mode = reduce-overhead compile_fullgraph = False - [sweep] downsample = 10 max_cost = 900 diff --git a/ocean/impulse_wars/benchmark.c b/ocean/impulse_wars/benchmark.c index 3071bf91b3..c11c1e2050 100644 --- a/ocean/impulse_wars/benchmark.c +++ b/ocean/impulse_wars/benchmark.c @@ -1,16 +1,16 @@ #include "env.h" void randActions(iwEnv *e) { - // e->lastRandState = e->randState; + // e->lastRandState = e->rng; uint8_t actionOffset = 0; for (uint8_t i = 0; i < e->numDrones; i++) { - e->actions[actionOffset + 0] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 1] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 2] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 3] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 4] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 5] = randFloat(&e->randState, -1.0f, 1.0f); - e->actions[actionOffset + 6] = randFloat(&e->randState, -1.0f, 1.0f); + e->actions[actionOffset + 0] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 1] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 2] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 3] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 4] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 5] = randFloat(&e->rng, -1.0f, 1.0f); + e->actions[actionOffset + 6] = randFloat(&e->rng, -1.0f, 1.0f); actionOffset += CONTINUOUS_ACTION_SIZE; } diff --git a/ocean/impulse_wars/binding.c b/ocean/impulse_wars/binding.c index 28b429773b..170421963b 100644 --- a/ocean/impulse_wars/binding.c +++ b/ocean/impulse_wars/binding.c @@ -1,121 +1,51 @@ -#include - #include "env.h" -static PyObject *get_consts(PyObject *self, PyObject *args); +#define OBS_SIZE 998 // for 2 drones (players) +// actions: +// 9: move, noop + 8 directions +// 17: aim, noop + 16 directions +// 2: shoot or not +// 2: brake or not +// 2: burst or not +#define NUM_ATNS 5 +#define ACT_SIZES {9, 17, 2, 2, 2} +#define OBS_TENSOR_T FloatTensor #define Env iwEnv -#define MY_SHARED -#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"} - -#include "../env_binding.h" - -#define setDictVal(dict, key, val) \ - if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) { \ - PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \ - return NULL; \ - } - -static PyObject *get_consts(PyObject *self, PyObject *args) { - PyObject *dronesArg = PyTuple_GetItem(args, 0); - if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) { - PyErr_SetString(PyExc_TypeError, "num_drones must be an integer"); - return NULL; - } - const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg); - - PyObject *dict = PyDict_New(); - if (PyErr_Occurred()) { - return NULL; - } - - const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE); - - setDictVal(dict, "obsBytes", obsBytes(numDrones)); - setDictVal(dict, "mapObsSize", MAP_OBS_SIZE); - setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones)); - setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones)); - setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float)); - setDictVal(dict, "wallTypes", NUM_WALL_TYPES); - setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1); - setDictVal(dict, "mapObsRows", MAP_OBS_ROWS); - setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS); - setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float))); - setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS); - setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET); - setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE); - setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE); - setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET); - setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS); - setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET); - setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE); - setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE); - setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET); - setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS); - setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET); - setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE); - setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE); - setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET); - setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS); - setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET); - setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET); - setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE); - setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE); - setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET); - setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET); - setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET); - setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE); - setDictVal(dict, "droneObsOffset", droneObsOffset); - setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE); - setDictVal(dict, "miscObsSize", MISC_OBS_SIZE); - setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE); +#include "vecenv.h" - setDictVal(dict, "maxDrones", MAX_DRONES); - setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE); +#define DICTGET(key) dict_get(kwargs, key)->value - return dict; -} - -static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { - VecEnv *ve = unpack_vecenv(args); - initMaps(ve->envs[0]); - - for (uint16_t i = 0; i < ve->num_envs; i++) { - iwEnv *e = (iwEnv *)ve->envs[i]; - setupEnv(e); - } - - return Py_None; -} - -static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) { +void my_init(Env* env, Dict* kwargs) { initEnv( - e, - (uint8_t)unpack(kwargs, "num_drones"), - (uint8_t)unpack(kwargs, "num_agents"), - (int8_t)unpack(kwargs, "map_idx"), - (uint64_t)unpack(kwargs, "seed"), - (bool)unpack(kwargs, "enable_teams"), - (bool)unpack(kwargs, "sitting_duck"), - (bool)unpack(kwargs, "is_training"), - (bool)unpack(kwargs, "continuous") + env, + 2, + 1, + -1, + 0, + (bool)DICTGET("enable_teams"), + (bool)DICTGET("sitting_duck"), + (bool)DICTGET("is_training"), + (bool)DICTGET("continuous") ); + setRewards( - e, - (float)unpack(kwargs, "reward_win"), - (float)unpack(kwargs, "reward_self_kill"), - (float)unpack(kwargs, "reward_enemy_death"), - (float)unpack(kwargs, "reward_enemy_kill"), + env, + (float)DICTGET("reward_win"), + (float)DICTGET("reward_self_kill"), + (float)DICTGET("reward_enemy_death"), + (float)DICTGET("reward_enemy_kill"), 0.0f, // teammate death punishment 0.0f, // teammate kill punishment - (float)unpack(kwargs, "reward_death"), - (float)unpack(kwargs, "reward_energy_emptied"), - (float)unpack(kwargs, "reward_weapon_pickup"), - (float)unpack(kwargs, "reward_shield_break"), - (float)unpack(kwargs, "reward_shot_hit_coef"), - (float)unpack(kwargs, "reward_explosion_hit_coef") + (float)DICTGET("reward_death"), + (float)DICTGET("reward_energy_emptied"), + (float)DICTGET("reward_weapon_pickup"), + (float)DICTGET("reward_shield_break"), + (float)DICTGET("reward_shot_hit_coef"), + (float)DICTGET("reward_explosion_hit_coef") ); - return 0; + + initMaps(env); } #define _LOG_BUF_SIZE 128 @@ -130,48 +60,46 @@ char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, cons return buf; } -static int my_log(PyObject *dict, Log *log) { - assign_to_dict(dict, "episode_length", log->length); - assign_to_dict(dict, "ties", log->ties); +void my_log(Log *log, Dict *out) { + dict_set(out, "episode_length", log->length); + dict_set(out, "ties", log->ties); - assign_to_dict(dict, "perf", log->stats[0].wins); - assign_to_dict(dict, "score", log->stats[0].wins); + dict_set(out, "perf", log->stats[0].wins); + dict_set(out, "score", log->stats[0].wins); char buf[_LOG_BUF_SIZE] = {0}; for (uint8_t i = 0; i < MAX_DRONES; i++) { - assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns); - assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled); - assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled); - assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime); - assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); - assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); - assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); - assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); - assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); - assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); - assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills); - assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); - assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins); + dict_set(out, droneLog(buf, i, "returns"), log->stats[i].returns); + dict_set(out, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled); + dict_set(out, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled); + dict_set(out, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime); + dict_set(out, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); + dict_set(out, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); + dict_set(out, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); + dict_set(out, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); + dict_set(out, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); + dict_set(out, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); + dict_set(out, droneLog(buf, i, "kills"), log->stats[i].kills); + dict_set(out, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); + dict_set(out, droneLog(buf, i, "wins"), log->stats[i].wins); // useful for debugging weapon balance, but really slows down // sweeps due to adding a ton of extra logging data // // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) { - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]); + // dict_set(out, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]); + // dict_set(out, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]); + // dict_set(out, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]); + // dict_set(out, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]); + // dict_set(out, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]); + // dict_set(out, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]); // } - assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired); - assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit); - assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken); - assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken); - assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp); - assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances); + dict_set(out, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired); + dict_set(out, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit); + dict_set(out, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken); + dict_set(out, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken); + dict_set(out, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp); + dict_set(out, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances); } - - return 0; } diff --git a/ocean/impulse_wars/binding.h b/ocean/impulse_wars/binding.h deleted file mode 100644 index 28b429773b..0000000000 --- a/ocean/impulse_wars/binding.h +++ /dev/null @@ -1,177 +0,0 @@ -#include - -#include "env.h" - -static PyObject *get_consts(PyObject *self, PyObject *args); - -#define Env iwEnv -#define MY_SHARED -#define MY_METHODS {"get_consts", get_consts, METH_VARARGS, "Get constants"} - -#include "../env_binding.h" - -#define setDictVal(dict, key, val) \ - if (PyDict_SetItemString(dict, key, PyLong_FromLong(val)) < 0) { \ - PyErr_SetString(PyExc_RuntimeError, "Failed to set " key " in dict"); \ - return NULL; \ - } - -static PyObject *get_consts(PyObject *self, PyObject *args) { - PyObject *dronesArg = PyTuple_GetItem(args, 0); - if (!PyObject_TypeCheck(dronesArg, &PyLong_Type)) { - PyErr_SetString(PyExc_TypeError, "num_drones must be an integer"); - return NULL; - } - const uint8_t numDrones = (uint8_t)PyLong_AsLong(dronesArg); - - PyObject *dict = PyDict_New(); - if (PyErr_Occurred()) { - return NULL; - } - - const uint16_t droneObsOffset = ENEMY_DRONE_OBS_OFFSET + ((numDrones - 1) * ENEMY_DRONE_OBS_SIZE); - - setDictVal(dict, "obsBytes", obsBytes(numDrones)); - setDictVal(dict, "mapObsSize", MAP_OBS_SIZE); - setDictVal(dict, "discreteObsSize", discreteObsSize(numDrones)); - setDictVal(dict, "continuousObsSize", continuousObsSize(numDrones)); - setDictVal(dict, "continuousObsBytes", continuousObsSize(numDrones) * sizeof(float)); - setDictVal(dict, "wallTypes", NUM_WALL_TYPES); - setDictVal(dict, "weaponTypes", NUM_WEAPONS + 1); - setDictVal(dict, "mapObsRows", MAP_OBS_ROWS); - setDictVal(dict, "mapObsColumns", MAP_OBS_COLUMNS); - setDictVal(dict, "continuousObsOffset", alignedSize(MAP_OBS_SIZE, sizeof(float))); - setDictVal(dict, "numNearWallObs", NUM_NEAR_WALL_OBS); - setDictVal(dict, "nearWallTypesObsOffset", NEAR_WALL_TYPES_OBS_OFFSET); - setDictVal(dict, "nearWallPosObsSize", NEAR_WALL_POS_OBS_SIZE); - setDictVal(dict, "nearWallObsSize", NEAR_WALL_OBS_SIZE); - setDictVal(dict, "nearWallPosObsOffset", NEAR_WALL_POS_OBS_OFFSET); - setDictVal(dict, "numFloatingWallObs", NUM_FLOATING_WALL_OBS); - setDictVal(dict, "floatingWallTypesObsOffset", FLOATING_WALL_TYPES_OBS_OFFSET); - setDictVal(dict, "floatingWallInfoObsSize", FLOATING_WALL_INFO_OBS_SIZE); - setDictVal(dict, "floatingWallObsSize", FLOATING_WALL_OBS_SIZE); - setDictVal(dict, "floatingWallInfoObsOffset", FLOATING_WALL_INFO_OBS_OFFSET); - setDictVal(dict, "numWeaponPickupObs", NUM_WEAPON_PICKUP_OBS); - setDictVal(dict, "weaponPickupTypesObsOffset", WEAPON_PICKUP_WEAPONS_OBS_OFFSET); - setDictVal(dict, "weaponPickupPosObsSize", WEAPON_PICKUP_POS_OBS_SIZE); - setDictVal(dict, "weaponPickupObsSize", WEAPON_PICKUP_OBS_SIZE); - setDictVal(dict, "weaponPickupPosObsOffset", WEAPON_PICKUP_POS_OBS_OFFSET); - setDictVal(dict, "numProjectileObs", NUM_PROJECTILE_OBS); - setDictVal(dict, "projectileDroneObsOffset", PROJECTILE_DRONE_OBS_OFFSET); - setDictVal(dict, "projectileTypesObsOffset", PROJECTILE_WEAPONS_OBS_OFFSET); - setDictVal(dict, "projectileInfoObsSize", PROJECTILE_INFO_OBS_SIZE); - setDictVal(dict, "projectileObsSize", PROJECTILE_OBS_SIZE); - setDictVal(dict, "projectileInfoObsOffset", PROJECTILE_INFO_OBS_OFFSET); - setDictVal(dict, "enemyDroneWeaponsObsOffset", ENEMY_DRONE_WEAPONS_OBS_OFFSET); - setDictVal(dict, "enemyDroneObsOffset", ENEMY_DRONE_OBS_OFFSET); - setDictVal(dict, "enemyDroneObsSize", ENEMY_DRONE_OBS_SIZE); - setDictVal(dict, "droneObsOffset", droneObsOffset); - setDictVal(dict, "droneObsSize", DRONE_OBS_SIZE); - setDictVal(dict, "miscObsSize", MISC_OBS_SIZE); - setDictVal(dict, "miscObsOffset", droneObsOffset + DRONE_OBS_SIZE); - - setDictVal(dict, "maxDrones", MAX_DRONES); - setDictVal(dict, "contActionsSize", CONTINUOUS_ACTION_SIZE); - - return dict; -} - -static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { - VecEnv *ve = unpack_vecenv(args); - initMaps(ve->envs[0]); - - for (uint16_t i = 0; i < ve->num_envs; i++) { - iwEnv *e = (iwEnv *)ve->envs[i]; - setupEnv(e); - } - - return Py_None; -} - -static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) { - initEnv( - e, - (uint8_t)unpack(kwargs, "num_drones"), - (uint8_t)unpack(kwargs, "num_agents"), - (int8_t)unpack(kwargs, "map_idx"), - (uint64_t)unpack(kwargs, "seed"), - (bool)unpack(kwargs, "enable_teams"), - (bool)unpack(kwargs, "sitting_duck"), - (bool)unpack(kwargs, "is_training"), - (bool)unpack(kwargs, "continuous") - ); - setRewards( - e, - (float)unpack(kwargs, "reward_win"), - (float)unpack(kwargs, "reward_self_kill"), - (float)unpack(kwargs, "reward_enemy_death"), - (float)unpack(kwargs, "reward_enemy_kill"), - 0.0f, // teammate death punishment - 0.0f, // teammate kill punishment - (float)unpack(kwargs, "reward_death"), - (float)unpack(kwargs, "reward_energy_emptied"), - (float)unpack(kwargs, "reward_weapon_pickup"), - (float)unpack(kwargs, "reward_shield_break"), - (float)unpack(kwargs, "reward_shot_hit_coef"), - (float)unpack(kwargs, "reward_explosion_hit_coef") - ); - return 0; -} - -#define _LOG_BUF_SIZE 128 - -char *droneLog(char *buf, const uint8_t droneIdx, const char *name) { - snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name); - return buf; -} - -char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) { - snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name); - return buf; -} - -static int my_log(PyObject *dict, Log *log) { - assign_to_dict(dict, "episode_length", log->length); - assign_to_dict(dict, "ties", log->ties); - - assign_to_dict(dict, "perf", log->stats[0].wins); - assign_to_dict(dict, "score", log->stats[0].wins); - - char buf[_LOG_BUF_SIZE] = {0}; - for (uint8_t i = 0; i < MAX_DRONES; i++) { - assign_to_dict(dict, droneLog(buf, i, "returns"), log->stats[i].returns); - assign_to_dict(dict, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled); - assign_to_dict(dict, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled); - assign_to_dict(dict, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime); - assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); - assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); - assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); - assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); - assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); - assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); - assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills); - assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); - assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins); - - // useful for debugging weapon balance, but really slows down - // sweeps due to adding a ton of extra logging data - // - // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) { - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]); - // assign_to_dict(dict, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]); - // } - - assign_to_dict(dict, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired); - assign_to_dict(dict, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit); - assign_to_dict(dict, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken); - assign_to_dict(dict, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken); - assign_to_dict(dict, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp); - assign_to_dict(dict, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances); - } - - return 0; -} diff --git a/ocean/impulse_wars/env.h b/ocean/impulse_wars/env.h index 2162b50144..3fc90be472 100644 --- a/ocean/impulse_wars/env.h +++ b/ocean/impulse_wars/env.h @@ -292,7 +292,7 @@ void computeNearObs(iwEnv *e, const droneEntity *drone, const uint16_t discreteO } void computeObs(iwEnv *e) { - for (uint8_t agentIdx = 0; agentIdx < e->numAgents; agentIdx++) { + for (uint8_t agentIdx = 0; agentIdx < e->num_agents; agentIdx++) { droneEntity *agentDrone = safe_array_get_at(e->drones, agentIdx); // if the drone is dead, only compute observations if it died // this step and it isn't out of bounds @@ -478,7 +478,7 @@ void setupEnv(iwEnv *e) { if (!e->isTraining) { firstMap = 1; } - mapIdx = randInt(&e->randState, firstMap, NUM_MAPS - 1); + mapIdx = randInt(&e->rng, firstMap, NUM_MAPS - 1); } DEBUG_LOGF("setting up map %d", mapIdx); setupMap(e, mapIdx); @@ -493,7 +493,7 @@ void setupEnv(iwEnv *e) { DEBUG_LOG("creating weapon pickups"); // start spawning pickups in a random quadrant - e->lastSpawnQuad = randInt(&e->randState, 0, 3); + e->lastSpawnQuad = randInt(&e->rng, 0, 3); for (uint8_t i = 0; i < maps[mapIdx]->weaponPickups; i++) { createWeaponPickup(e); } @@ -530,7 +530,7 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui DEBUG_LOGF("seed: %lu", seed); e->numDrones = numDrones; - e->numAgents = numAgents; + e->num_agents = numAgents; e->teamsEnabled = enableTeams; e->numTeams = numDrones; if (e->teamsEnabled) { @@ -557,11 +557,10 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui e->continuousActions = continuousActions; - // TODO: remove when puffer bindings add truncations - e->truncations = fastCalloc(numDrones, sizeof(uint8_t)); + //e->truncations = fastCalloc(numDrones, sizeof(uint8_t)); setEnvFrameRate(e); - e->randState = seed; + e->rng = seed; e->needsReset = false; b2WorldDef worldDef = b2DefaultWorldDef(); @@ -620,9 +619,9 @@ void setRewards(iwEnv *e, float winReward, float selfKillPunishment, float enemy void clearEnv(iwEnv *e) { // rewards get cleared in stepEnv every step - // memset(e->masks, 1, e->numAgents * sizeof(uint8_t)); - memset(e->terminals, 0x0, e->numAgents * sizeof(uint8_t)); - memset(e->truncations, 0x0, e->numAgents * sizeof(uint8_t)); + // memset(e->masks, 1, e->num_agents * sizeof(uint8_t)); + memset(e->terminals, 0.0f, e->num_agents * sizeof(uint8_t)); + //memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t)); e->episodeLength = 0; memset(e->stats, 0x0, sizeof(e->stats)); @@ -749,7 +748,7 @@ float computeReward(iwEnv *e, droneEntity *drone) { reward += e->shieldBreakReward; } - if (e->numAgents == e->numDrones) { + if (e->num_agents == e->numDrones) { if (drone->stepInfo.shotTaken[i] != 0) { reward -= drone->stepInfo.shotTaken[i] * e->shotHitRewardCoef; } @@ -791,7 +790,7 @@ float computeReward(iwEnv *e, droneEntity *drone) { const float REWARD_EPS = 1.0e-6f; void computeRewards(iwEnv *e, const bool roundOver, const int8_t winner, const int8_t winningTeam) { - if (roundOver && winner != -1 && winner < e->numAgents) { + if (roundOver && winner != -1 && winner < e->num_agents) { e->rewards[winner] += e->winReward; } @@ -807,7 +806,7 @@ void computeRewards(iwEnv *e, const bool roundOver, const int8_t winner, const i reward += e->selfKillPunishment; } } - if (i < e->numAgents) { + if (i < e->num_agents) { e->rewards[i] += reward; } e->stats[i].returns += reward; @@ -821,23 +820,39 @@ static inline bool isActionNoop(const b2Vec2 action) { agentActions _computeActions(iwEnv *e, droneEntity *drone, const agentActions *manualActions) { agentActions actions = {0}; - const uint8_t offset = drone->idx * CONTINUOUS_ACTION_SIZE; if (manualActions == NULL) { - actions.move = (b2Vec2){.x = e->actions[offset + 0], .y = e->actions[offset + 1]}; - actions.aim = (b2Vec2){.x = e->actions[offset + 2], .y = e->actions[offset + 3]}; + float (*envActions)[7] = (float(*)[7])e->actions; + + uint8_t move = envActions[drone->idx][0]; + // 0 is no-op for both move and aim + ASSERT(move <= 8); + if (move != 0) { + move--; + actions.move.x = discMoveToContMoveMap[0][move]; + actions.move.y = discMoveToContMoveMap[1][move]; + } + uint8_t aim = envActions[drone->idx][0]; + ASSERT(aim <= 16); + if (aim != 0) { + aim--; + actions.aim.x = discAimToContAimMap[0][aim]; + actions.aim.y = discAimToContAimMap[1][aim]; + } + if (e->continuousActions) { actions.move.x = tanhf(actions.move.x); actions.move.y = tanhf(actions.move.y); actions.aim.x = tanhf(actions.aim.x); actions.aim.y = tanhf(actions.aim.y); } - actions.chargingWeapon = e->actions[offset + 4] > 0.0f; + + actions.chargingWeapon = envActions[drone->idx][4] > 0.0f; actions.shoot = actions.chargingWeapon; if (!actions.chargingWeapon && drone->chargingWeapon) { actions.shoot = true; } - actions.brake = e->actions[offset + 5] > 0.0f; - actions.chargingBurst = e->actions[offset + 6] > 0.0f; + actions.brake = envActions[drone->idx][5] > 0.0f; + actions.chargingBurst = envActions[drone->idx][6] > 0.0f; } else { actions.move = manualActions->move; actions.aim = manualActions->aim; @@ -1065,7 +1080,7 @@ void stepEnv(iwEnv *e) { continue; } - if (i < e->numAgents) { + if (i < e->num_agents) { stepActions[i] = computeActions(e, drone, NULL); } else { const agentActions scriptedActions = scriptedAgentActions(e, drone); @@ -1074,7 +1089,7 @@ void stepEnv(iwEnv *e) { } // reset reward buffer - memset(e->rewards, 0x0, e->numAgents * sizeof(float)); + memset(e->rewards, 0x0, e->num_agents * sizeof(float)); for (int i = 0; i < e->frameSkip; i++) { #ifdef __EMSCRIPTEN__ @@ -1154,7 +1169,7 @@ void stepEnv(iwEnv *e) { // handle sudden death e->stepsLeft = max(e->stepsLeft - 1, 0); - if ((!e->isTraining || e->numDrones == e->numAgents) && e->stepsLeft == 0) { + if ((!e->isTraining || e->numDrones == e->num_agents) && e->stepsLeft == 0) { e->suddenDeathSteps = max(e->suddenDeathSteps - 1, 0); if (e->suddenDeathSteps == 0) { DEBUG_LOG("placing sudden death walls"); @@ -1190,9 +1205,9 @@ void stepEnv(iwEnv *e) { } } else { deadDrones++; - if (i < e->numAgents) { + if (i < e->num_agents) { if (drone->diedThisStep) { - e->terminals[i] = 1; + e->terminals[i] = 1.0f; } // else { // e->masks[i] = 0; @@ -1212,7 +1227,7 @@ void stepEnv(iwEnv *e) { } // if the enemy drone(s) are scripted don't enable sudden death // so that the agent has to work for victories - if (e->isTraining && e->numDrones != e->numAgents && e->stepsLeft == 0) { + if (e->isTraining && e->numDrones != e->num_agents && e->stepsLeft == 0) { roundOver = true; lastAliveTeam = -1; } @@ -1226,13 +1241,13 @@ void stepEnv(iwEnv *e) { } if (roundOver) { - if (e->numDrones != e->numAgents && e->stepsLeft == 0) { - DEBUG_LOG("truncating episode"); - memset(e->truncations, 1, e->numAgents * sizeof(uint8_t)); - } else { - DEBUG_LOG("terminating episode"); - memset(e->terminals, 1, e->numAgents * sizeof(uint8_t)); - } + // if (e->numDrones != e->num_agents && e->stepsLeft == 0) { + // DEBUG_LOG("truncating episode"); + // memset(e->truncations, 1, e->num_agents * sizeof(uint8_t)); + // } + + DEBUG_LOG("terminating episode"); + memset(e->terminals, 1.0f, e->num_agents * sizeof(float)); Log log = {0}; log.length = e->episodeLength; diff --git a/ocean/impulse_wars/game.h b/ocean/impulse_wars/game.h index 65e76f12e5..c47539f906 100644 --- a/ocean/impulse_wars/game.h +++ b/ocean/impulse_wars/game.h @@ -336,14 +336,14 @@ bool findOpenPos(iwEnv *e, const enum shapeCategory shapeType, b2Vec2 *emptyPos, uint16_t cellIdx; if (quad == -1) { - cellIdx = randInt(&e->randState, 0, nCells); + cellIdx = randInt(&e->rng, 0, nCells); } else { const float minX = e->map->spawnQuads[quad].min.x; const float minY = e->map->spawnQuads[quad].min.y; const float maxX = e->map->spawnQuads[quad].max.x; const float maxY = e->map->spawnQuads[quad].max.y; - b2Vec2 randPos = {.x = randFloat(&e->randState, minX, maxX), .y = randFloat(&e->randState, minY, maxY)}; + b2Vec2 randPos = {.x = randFloat(&e->rng, minX, maxX), .y = randFloat(&e->rng, minY, maxY)}; cellIdx = entityPosToCellIdx(e, randPos); } if (bitTest(checkedCells, cellIdx)) { @@ -551,7 +551,7 @@ enum weaponType randWeaponPickupType(iwEnv *e) { totalWeight += spawnWeights[i - 1]; } - const float randPick = randFloat(&e->randState, 0.0f, totalWeight); + const float randPick = randFloat(&e->rng, 0.0f, totalWeight); float cumulativeWeight = 0.0f; enum weaponType type = STANDARD_WEAPON; for (uint8_t i = 1; i < NUM_WEAPONS; i++) { @@ -718,7 +718,7 @@ void createDrone(iwEnv *e, const uint8_t idx) { // doing this while training will result in much slower learning // due to drones starting much farther apart if (e->lastSpawnQuad == -1) { - spawnQuad = randInt(&e->randState, 0, 3); + spawnQuad = randInt(&e->rng, 0, 3); } else if (e->numDrones == 2) { spawnQuad = 3 - e->lastSpawnQuad; } else { @@ -787,10 +787,10 @@ void droneAddEnergy(droneEntity *drone, float energy) { } void createDronePiece(iwEnv *e, droneEntity *drone, const bool fromShield) { - const float distance = randFloat(&e->randState, DRONE_PIECE_MIN_DISTANCE, DRONE_PIECE_MAX_DISTANCE); - const b2Vec2 direction = {.x = randFloat(&e->randState, -1.0f, 1.0f), .y = randFloat(&e->randState, -1.0f, 1.0f)}; + const float distance = randFloat(&e->rng, DRONE_PIECE_MIN_DISTANCE, DRONE_PIECE_MAX_DISTANCE); + const b2Vec2 direction = {.x = randFloat(&e->rng, -1.0f, 1.0f), .y = randFloat(&e->rng, -1.0f, 1.0f)}; const b2Vec2 pos = b2MulAdd(drone->pos, distance, direction); - const b2Rot rot = b2MakeRot(randFloat(&e->randState, -PI, PI)); + const b2Rot rot = b2MakeRot(randFloat(&e->rng, -PI, PI)); dronePieceEntity *piece = fastCalloc(1, sizeof(dronePieceEntity)); piece->droneIdx = drone->idx; @@ -810,9 +810,9 @@ void createDronePiece(iwEnv *e, droneEntity *drone, const bool fromShield) { pieceBodyDef.linearDamping = DRONE_PIECE_LINEAR_DAMPING; pieceBodyDef.angularDamping = DRONE_PIECE_ANGULAR_DAMPING; const float bonus = 1.0f + min(b2Length(drone->velocity) / 15.0f, 5.0f); - const float speed = randFloat(&e->randState, DRONE_PIECE_MIN_SPEED, DRONE_PIECE_MAX_SPEED) * bonus; + const float speed = randFloat(&e->rng, DRONE_PIECE_MIN_SPEED, DRONE_PIECE_MAX_SPEED) * bonus; pieceBodyDef.linearVelocity = b2MulSV(speed, direction); - pieceBodyDef.angularVelocity = randFloat(&e->randState, -PI, PI); + pieceBodyDef.angularVelocity = randFloat(&e->rng, -PI, PI); pieceBodyDef.userData = ent; piece->bodyID = b2CreateBody(e->worldID, &pieceBodyDef); @@ -1145,8 +1145,8 @@ void createProjectile(iwEnv *e, droneEntity *drone, const b2Vec2 normAim) { b2Vec2 forwardVel = b2MulSV(b2Dot(drone->velocity, normAim), normAim); b2Vec2 lateralVel = b2Sub(drone->velocity, forwardVel); lateralVel = b2MulSV(projectileShapeDef.density * DRONE_MOVE_AIM_COEF, lateralVel); - b2Vec2 aim = weaponAdjustAim(&e->randState, drone->weaponInfo->type, drone->heat, normAim); - b2Vec2 fire = b2MulAdd(lateralVel, weaponFire(&e->randState, drone->weaponInfo->type), aim); + b2Vec2 aim = weaponAdjustAim(&e->rng, drone->weaponInfo->type, drone->heat, normAim); + b2Vec2 fire = b2MulAdd(lateralVel, weaponFire(&e->rng, drone->weaponInfo->type), aim); b2Body_ApplyLinearImpulseToCenter(projectileBodyID, fire, true); projectileEntity *projectile = fastCalloc(1, sizeof(projectileEntity)); @@ -1387,8 +1387,8 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { // if the direction is zero, the magnitude cannot be calculated // correctly so set the direction randomly if (b2VecEqual(direction, b2Vec2_zero)) { - direction.x = randFloat(&ctx->e->randState, -1.0f, 1.0f); - direction.y = randFloat(&ctx->e->randState, -1.0f, 1.0f); + direction.x = randFloat(&ctx->e->rng, -1.0f, 1.0f); + direction.y = randFloat(&ctx->e->rng, -1.0f, 1.0f); direction = b2Normalize(direction); } diff --git a/ocean/impulse_wars/impulse_wars.c b/ocean/impulse_wars/impulse_wars.c index 1dfbd1d5c3..26db5ae5a6 100644 --- a/ocean/impulse_wars/impulse_wars.c +++ b/ocean/impulse_wars/impulse_wars.c @@ -16,9 +16,9 @@ int main(void) { posix_memalign((void **)&e->observations, sizeof(void *), alignedSize(NUM_DRONES * obsBytes(NUM_DRONES), sizeof(float))); e->rewards = fastCalloc(NUM_DRONES, sizeof(float)); e->actions = fastCalloc(NUM_DRONES * CONTINUOUS_ACTION_SIZE, sizeof(float)); - e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t)); - e->terminals = fastCalloc(NUM_DRONES, sizeof(uint8_t)); - e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t)); + //e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t)); + e->terminals = fastCalloc(NUM_DRONES, sizeof(float)); + //e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t)); rayClient *client = createRayClient(); e->client = client; @@ -41,9 +41,9 @@ int main(void) { free(e->observations); fastFree(e->actions); fastFree(e->rewards); - fastFree(e->masks); + //fastFree(e->masks); fastFree(e->terminals); - fastFree(e->truncations); + //fastFree(e->truncations); fastFree(e); destroyRayClient(client); #endif diff --git a/ocean/impulse_wars/map.h b/ocean/impulse_wars/map.h index eb5f2c7026..3e3c3f7c50 100644 --- a/ocean/impulse_wars/map.h +++ b/ocean/impulse_wars/map.h @@ -450,8 +450,8 @@ void setupMap(iwEnv *e, const uint8_t mapIdx) { e->mapIdx = mapIdx; e->map = maps[mapIdx]; e->defaultWeapon = weaponInfos[maps[mapIdx]->defaultWeapon]; - if (e->isTraining && randFloat(&e->randState, 0.0f, 1.0f) < 0.25f) { - e->defaultWeapon = weaponInfos[randInt(&e->randState, 0, NUM_WEAPONS - 1)]; + if (e->isTraining && randFloat(&e->rng, 0.0f, 1.0f) < 0.25f) { + e->defaultWeapon = weaponInfos[randInt(&e->rng, 0, NUM_WEAPONS - 1)]; } uint16_t cellIdx = 0; @@ -576,7 +576,13 @@ bool posValidDroneSpawnPoint(const iwEnv *e, const b2Vec2 pos) { return true; } +bool MAPS_INITIALIZED = false; + void initMaps(iwEnv *e) { + if (MAPS_INITIALIZED) { + return; + } + for (uint8_t i = 0; i < NUM_MAPS; i++) { setupMap(e, i); mapEntry *map = maps[i]; @@ -631,6 +637,8 @@ void initMaps(iwEnv *e) { } e->mapIdx = -1; + + MAPS_INITIALIZED = true; } void destroyMaps() { diff --git a/ocean/impulse_wars/render.h b/ocean/impulse_wars/render.h index d4300d5538..0b46c74d98 100644 --- a/ocean/impulse_wars/render.h +++ b/ocean/impulse_wars/render.h @@ -809,7 +809,7 @@ void renderUI(const iwEnv *e, const bool starting) { char *playerType = ""; if (droneControlledByHuman(e, drone->idx)) { playerType = "Human"; - } else if (drone->idx < e->numAgents) { + } else if (drone->idx < e->num_agents) { playerType = "NN"; } else { if (e->sittingDuck) { @@ -1211,7 +1211,7 @@ void renderDroneGuides(iwEnv *e, const droneEntity *drone, const bool ending) { if (!b2VecEqual(drone->lastMove, b2Vec2_zero) && !ending) { const float moveMagnitude = b2Length(drone->lastMove); const float thrusterAngle = RAD2DEG * b2Atan2(-drone->lastMove.y, -drone->lastMove.x); - const float flickerWidth = randFloat(&e->randState, -0.05f, 0.05f); + const float flickerWidth = randFloat(&e->rng, -0.05f, 0.05f); const float thrusterWidth = 2.5f * ((halfDroneRadius * moveMagnitude) + halfDroneRadius + flickerWidth); const b2Vec2 thrusterPos = b2MulAdd(drone->pos, -thrusterWidth / 2.0f, drone->lastMove); const Color thrusterColor = Fade(getDroneColor(drone->idx), 0.9); diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h index 9bad059336..113060ba2e 100644 --- a/ocean/impulse_wars/types.h +++ b/ocean/impulse_wars/types.h @@ -413,7 +413,7 @@ typedef struct debugPoint { typedef struct iwEnv { uint8_t numDrones; - uint8_t numAgents; + uint8_t num_agents; uint8_t numTeams; bool teamsEnabled; bool sittingDuck; @@ -439,15 +439,15 @@ typedef struct iwEnv { uint8_t *observations; float *rewards; float *actions; - uint8_t *masks; - uint8_t *terminals; - uint8_t *truncations; + //uint8_t *masks; + float *terminals; + //uint8_t *truncations; uint8_t frameRate; float deltaTime; uint8_t frameSkip; uint8_t box2dSubSteps; - uint64_t randState; + uint64_t rng; bool needsReset; uint16_t episodeLength; From 498681421c8c64023504b0ec39f96618b1c75c80 Mon Sep 17 00:00:00 2001 From: Andrew LeFevre Date: Tue, 28 Apr 2026 14:42:56 -0400 Subject: [PATCH 3/4] trains without crashing Co-authored-by: Copilot --- build.sh | 27 ++- config/impulse_wars.ini | 130 +------------ .../impulse_wars/.clang-format | 1 - ocean/impulse_wars/binding.c | 72 +++---- ocean/impulse_wars/env.h | 57 +++--- ocean/impulse_wars/helpers.h | 14 -- ocean/impulse_wars/impulse_wars.c | 8 +- ocean/impulse_wars/impulse_wars.py | 181 ------------------ ocean/impulse_wars/pyproject.toml | 25 --- ocean/impulse_wars/types.h | 7 +- pufferlib/pufferl.py | 2 +- .../impulse_wars/shaders/gls330/bloom.fs | 2 +- resources/impulse_wars/shaders/gls330/blur.fs | 2 +- src/bindings.cu | 4 +- src/bindings_cpu.cpp | 2 +- src/pufferlib.cu | 2 +- 16 files changed, 89 insertions(+), 447 deletions(-) rename .clang-format => ocean/impulse_wars/.clang-format (99%) delete mode 100644 ocean/impulse_wars/impulse_wars.py delete mode 100644 ocean/impulse_wars/pyproject.toml diff --git a/build.sh b/build.sh index 6cb06a7403..46beafaceb 100755 --- a/build.sh +++ b/build.sh @@ -50,20 +50,23 @@ if [ "$ENV" = "all" ]; then exit 0 fi +STANDALONE_LDFLAGS=(-fuse-ld=lld) +SHARED_LDFLAGS=(-fuse-ld=lld) + # Linux/mac PLATFORM="$(uname -s)" if [ "$PLATFORM" = "Linux" ]; then RAYLIB_NAME='raylib-5.5_linux_amd64' OMP_LIB=-lomp5 SANITIZE_FLAGS=(-fsanitize=address,undefined,bounds,pointer-overflow,leak -fno-omit-frame-pointer) - STANDALONE_LDFLAGS=(-lGL) - SHARED_LDFLAGS=(-Bsymbolic-functions) + STANDALONE_LDFLAGS+=(-lGL) + SHARED_LDFLAGS+=(-Bsymbolic-functions) else RAYLIB_NAME='raylib-5.5_macos' OMP_LIB=-lomp SANITIZE_FLAGS=() - STANDALONE_LDFLAGS=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL) - SHARED_LDFLAGS=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup) + STANDALONE_LDFLAGS+=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL) + SHARED_LDFLAGS+=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup) fi CLANG_WARN=( @@ -120,9 +123,13 @@ elif [ "$ENV" = "impulse_wars" ]; then BOX2D_URL="https://github.com/capnspacehook/box2d/releases/latest/download" download "$BOX2D_NAME" "$BOX2D_URL/$BOX2D_NAME.tar.gz" INCLUDES+=(-I./$BOX2D_NAME/include -I./$BOX2D_NAME/src) - LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a") - - CLANG_OPT=(-flto -fno-math-errno -march=native) + + if [ -z "$DEBUG" ]; then + CLANG_OPT+=(-flto -fno-math-errno -march=native) + LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a") + else + LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2dd.a") + fi elif [ -d "ocean/$ENV" ]; then SRC_DIR="ocean/$ENV" else @@ -137,9 +144,9 @@ if [ -n "$DEBUG" ] || [ "$MODE" = "local" ]; then NVCC_OPT="-O0 -g" LINK_OPT="-g" else - CLANG_OPT+=(-O2 -DNDEBUG "${CLANG_WARN[@]}") - NVCC_OPT="-O2 --threads 0" - LINK_OPT="-O2" + CLANG_OPT+=(-O3 -DNDEBUG "${CLANG_WARN[@]}") + NVCC_OPT="-O3 --threads 0" + LINK_OPT="-O3" fi if [ "$MODE" = "local" ] || [ "$MODE" = "fast" ]; then FLAGS=( diff --git a/config/impulse_wars.ini b/config/impulse_wars.ini index 772c08306f..a252984ddc 100644 --- a/config/impulse_wars.ini +++ b/config/impulse_wars.ini @@ -1,28 +1,11 @@ [base] env_name = impulse_wars -max_suggestion_cost = 10_800 - -[policy] -hidden_size = 512 -cnn_channels = 64 - -# These must match what's set in env below -continuous = False -num_drones = 2 -is_training = True - -[vec] -num_envs = 4 -#num_workers = 4 -#batch_size = 4 - [env] -num_envs = 1024 num_drones = 2 num_agents = 1 enable_teams = False -sitting_duck = False +sitting_duck = True continuous = False is_training = True @@ -40,114 +23,3 @@ reward_explosion_hit_coef = 0.005 [train] total_timesteps = 1_000_000_000 checkpoint_interval = 250 - -learning_rate = 0.005 - -compile = False -compile_mode = reduce-overhead -compile_fullgraph = False - -[sweep] -downsample = 10 -max_cost = 900 - -[sweep.env.num_envs] -distribution = uniform_pow2 -min = 1 -max = 1024 -mean = 128 -scale = auto - -# reward parameters -[sweep.env.reward_win] -distribution = uniform -min = 0.0 -mean = 2.0 -max = 5.0 -scale = auto - -[sweep.env.reward_self_kill] -distribution = uniform -min = -3.0 -mean = -1.0 -max = 0.0 -scale = auto - -[sweep.env.reward_enemy_death] -distribution = uniform -min = 0.0 -mean = 1.0 -max = 3.0 -scale = auto - -[sweep.env.reward_kill] -distribution = uniform -min = 0.0 -mean = 1.0 -max = 3.0 -scale = auto - -[sweep.env.reward_death] -distribution = uniform -min = -1.0 -mean = -0.25 -max = 0.0 -scale = auto - -[sweep.env.reward_energy_emptied] -distribution = uniform -min = -2.0 -mean = -0.75 -max = 0.0 -scale = auto - -[sweep.env.reward_weapon_pickup] -distribution = uniform -min = 0.0 -mean = 0.5 -max = 3.0 -scale = auto - -[sweep.env.reward_shield_break] -distribution = uniform -min = 0.0 -mean = 0.5 -max = 3.0 -scale = auto - -[sweep.env.reward_shot_hit_coef] -distribution = log_normal -min = 0.0005 -mean = 0.005 -max = 0.05 -scale = auto - -[sweep.env.reward_explosion_hit_coef] -distribution = log_normal -min = 0.0005 -mean = 0.005 -max = 0.05 -scale = auto - -# hyperparameters -[sweep.train.total_timesteps] -distribution = log_normal -min = 250_000_000 -max = 1_500_000_000 -mean = 500_000_000 -scale = time - -[sweep.train.batch_size] -distribution = uniform_pow2 -min = 65_536 -max = 1_048_576 -mean = 262_144 -scale = auto - -[sweep.train.horizon] -distribution = uniform_pow2 -min = 64 -max = 256 -mean = 128 -scale = auto - diff --git a/.clang-format b/ocean/impulse_wars/.clang-format similarity index 99% rename from .clang-format rename to ocean/impulse_wars/.clang-format index d9ba19d3de..98f71421ec 100644 --- a/.clang-format +++ b/ocean/impulse_wars/.clang-format @@ -85,7 +85,6 @@ BreakAfterAttributes: Leave BreakAfterJavaFieldAnnotations: false BreakArrays: true BreakBeforeBinaryOperators: None -BreakBeforeClosingBracket: Always BreakBeforeConceptDeclarations: Always BreakBeforeBraces: Custom BreakBeforeInlineASMColon: OnlyMultiline diff --git a/ocean/impulse_wars/binding.c b/ocean/impulse_wars/binding.c index 170421963b..0e4186fbf2 100644 --- a/ocean/impulse_wars/binding.c +++ b/ocean/impulse_wars/binding.c @@ -16,10 +16,10 @@ #define DICTGET(key) dict_get(kwargs, key)->value -void my_init(Env* env, Dict* kwargs) { +void my_init(Env *env, Dict *kwargs) { initEnv( env, - 2, + MAX_DRONES, 1, -1, 0, @@ -48,17 +48,26 @@ void my_init(Env* env, Dict* kwargs) { initMaps(env); } -#define _LOG_BUF_SIZE 128 - -char *droneLog(char *buf, const uint8_t droneIdx, const char *name) { - snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s", droneIdx, name); - return buf; -} - -char *weaponLog(char *buf, const uint8_t droneIdx, const uint8_t weaponIdx, const char *name) { - snprintf(buf, _LOG_BUF_SIZE, "drone_%d_%s_%s", droneIdx, weaponNames[weaponIdx], name); - return buf; -} +#define LOG_DRONE_STATS(log, out, idx, idxStr) \ + dict_set(out, "drone_" idxStr "_returns", log->stats[idx].returns); \ + dict_set(out, "drone_" idxStr "_distance_traveled", log->stats[idx].distanceTraveled); \ + dict_set(out, "drone_" idxStr "_abs_distance_traveled", log->stats[idx].absDistanceTraveled); \ + dict_set(out, "drone_" idxStr "_brake_time", log->stats[idx].brakeTime); \ + dict_set(out, "drone_" idxStr "_total_bursts", log->stats[idx].totalBursts); \ + dict_set(out, "drone_" idxStr "_bursts_hit", log->stats[idx].burstsHit); \ + dict_set(out, "drone_" idxStr "_energy_emptied", log->stats[idx].energyEmptied); \ + dict_set(out, "drone_" idxStr "_shields_broken", log->stats[idx].shieldsBroken); \ + dict_set(out, "drone_" idxStr "_own_shield_broken", log->stats[idx].ownShieldBroken); \ + dict_set(out, "drone_" idxStr "_self_kills", log->stats[idx].selfKills); \ + dict_set(out, "drone_" idxStr "_kills", log->stats[idx].kills); \ + dict_set(out, "drone_" idxStr "_unknown_kills", log->stats[idx].unknownKills); \ + dict_set(out, "drone_" idxStr "_wins", log->stats[idx].wins); \ + dict_set(out, "drone_" idxStr "_total_shots_fired", log->stats[idx].totalShotsFired); \ + dict_set(out, "drone_" idxStr "_total_shots_hit", log->stats[idx].totalShotsHit); \ + dict_set(out, "drone_" idxStr "_total_shots_taken", log->stats[idx].totalShotsTaken); \ + dict_set(out, "drone_" idxStr "_total_own_shots_taken", log->stats[idx].totalOwnShotsTaken); \ + dict_set(out, "drone_" idxStr "_total_picked_up", log->stats[idx].totalWeaponsPickedUp); \ + dict_set(out, "drone_" idxStr "_total_shot_distances", log->stats[idx].totalShotDistances) void my_log(Log *log, Dict *out) { dict_set(out, "episode_length", log->length); @@ -67,39 +76,6 @@ void my_log(Log *log, Dict *out) { dict_set(out, "perf", log->stats[0].wins); dict_set(out, "score", log->stats[0].wins); - char buf[_LOG_BUF_SIZE] = {0}; - for (uint8_t i = 0; i < MAX_DRONES; i++) { - dict_set(out, droneLog(buf, i, "returns"), log->stats[i].returns); - dict_set(out, droneLog(buf, i, "distance_traveled"), log->stats[i].distanceTraveled); - dict_set(out, droneLog(buf, i, "abs_distance_traveled"), log->stats[i].absDistanceTraveled); - dict_set(out, droneLog(buf, i, "brake_time"), log->stats[i].brakeTime); - dict_set(out, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); - dict_set(out, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); - dict_set(out, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); - dict_set(out, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); - dict_set(out, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); - dict_set(out, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); - dict_set(out, droneLog(buf, i, "kills"), log->stats[i].kills); - dict_set(out, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); - dict_set(out, droneLog(buf, i, "wins"), log->stats[i].wins); - - // useful for debugging weapon balance, but really slows down - // sweeps due to adding a ton of extra logging data - // - // for (uint8_t j = 0; j < _NUM_WEAPONS; j++) { - // dict_set(out, weaponLog(buf, i, j, "shots_fired"), log->stats[i].shotsFired[j]); - // dict_set(out, weaponLog(buf, i, j, "shots_hit"), log->stats[i].shotsHit[j]); - // dict_set(out, weaponLog(buf, i, j, "shots_taken"), log->stats[i].shotsTaken[j]); - // dict_set(out, weaponLog(buf, i, j, "own_shots_taken"), log->stats[i].ownShotsTaken[j]); - // dict_set(out, weaponLog(buf, i, j, "picked_up"), log->stats[i].weaponsPickedUp[j]); - // dict_set(out, weaponLog(buf, i, j, "shot_distances"), log->stats[i].shotDistances[j]); - // } - - dict_set(out, droneLog(buf, i, "total_shots_fired"), log->stats[i].totalShotsFired); - dict_set(out, droneLog(buf, i, "total_shots_hit"), log->stats[i].totalShotsHit); - dict_set(out, droneLog(buf, i, "total_shots_taken"), log->stats[i].totalShotsTaken); - dict_set(out, droneLog(buf, i, "total_own_shots_taken"), log->stats[i].totalOwnShotsTaken); - dict_set(out, droneLog(buf, i, "total_picked_up"), log->stats[i].totalWeaponsPickedUp); - dict_set(out, droneLog(buf, i, "total_shot_distances"), log->stats[i].totalShotDistances); - } + LOG_DRONE_STATS(log, out, 0, "0"); + LOG_DRONE_STATS(log, out, 1, "1"); } diff --git a/ocean/impulse_wars/env.h b/ocean/impulse_wars/env.h index 3fc90be472..2a45003a15 100644 --- a/ocean/impulse_wars/env.h +++ b/ocean/impulse_wars/env.h @@ -463,6 +463,7 @@ void computeObs(iwEnv *e) { } void setupEnv(iwEnv *e) { + e->isSetup = true; e->needsReset = false; e->stepsLeft = e->totalSteps; @@ -557,10 +558,11 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui e->continuousActions = continuousActions; - //e->truncations = fastCalloc(numDrones, sizeof(uint8_t)); + // e->truncations = fastCalloc(numDrones, sizeof(uint8_t)); setEnvFrameRate(e); e->rng = seed; + e->isSetup = false; e->needsReset = false; b2WorldDef worldDef = b2DefaultWorldDef(); @@ -620,8 +622,8 @@ void setRewards(iwEnv *e, float winReward, float selfKillPunishment, float enemy void clearEnv(iwEnv *e) { // rewards get cleared in stepEnv every step // memset(e->masks, 1, e->num_agents * sizeof(uint8_t)); - memset(e->terminals, 0.0f, e->num_agents * sizeof(uint8_t)); - //memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t)); + memset(e->terminals, 0.0f, e->num_agents * sizeof(float)); + // memset(e->truncations, 0x0, e->num_agents * sizeof(uint8_t)); e->episodeLength = 0; memset(e->stats, 0x0, sizeof(e->stats)); @@ -666,30 +668,33 @@ void clearEnv(iwEnv *e) { } void destroyEnv(iwEnv *e) { - clearEnv(e); + if (e->isSetup) { + clearEnv(e); - for (uint8_t i = 0; i < NUM_MAPS; i++) { - pathingInfo *info = &e->mapPathing[i]; - fastFree(info->paths); - fastFree(info->pathBuffer); - } - fastFree(e->mapPathing); + for (size_t i = 0; i < cc_array_size(e->walls); i++) { + wallEntity *wall = safe_array_get_at(e->walls, i); + destroyWall(e, wall, false); + } - for (size_t i = 0; i < cc_array_size(e->walls); i++) { - wallEntity *wall = safe_array_get_at(e->walls, i); - destroyWall(e, wall, false); - } + for (size_t i = 0; i < cc_array_size(e->cells); i++) { + mapCell *cell = safe_array_get_at(e->cells, i); + fastFree(cell); + } - for (size_t i = 0; i < cc_array_size(e->cells); i++) { - mapCell *cell = safe_array_get_at(e->cells, i); - fastFree(cell); - } + for (size_t i = 0; i < cc_array_size(e->entities); i++) { + entity *ent = safe_array_get_at(e->entities, i); + fastFree(ent->id); + fastFree(ent); + } - for (size_t i = 0; i < cc_array_size(e->entities); i++) { - entity *ent = safe_array_get_at(e->entities, i); - fastFree(ent->id); - fastFree(ent); + for (uint8_t i = 0; i < NUM_MAPS; i++) { + pathingInfo *info = &e->mapPathing[i]; + fastFree(info->paths); + fastFree(info->pathBuffer); + } + fastFree(e->mapPathing); } + b2DestroyIdPool(&e->idPool); cc_array_destroy(e->entities); @@ -711,7 +716,9 @@ void destroyEnv(iwEnv *e) { } void resetEnv(iwEnv *e) { - clearEnv(e); + if (e->isSetup) { + clearEnv(e); + } setupEnv(e); } @@ -821,7 +828,7 @@ agentActions _computeActions(iwEnv *e, droneEntity *drone, const agentActions *m agentActions actions = {0}; if (manualActions == NULL) { - float (*envActions)[7] = (float(*)[7])e->actions; + float (*envActions)[7] = (float (*)[7])e->actions; uint8_t move = envActions[drone->idx][0]; // 0 is no-op for both move and aim @@ -1247,7 +1254,7 @@ void stepEnv(iwEnv *e) { // } DEBUG_LOG("terminating episode"); - memset(e->terminals, 1.0f, e->num_agents * sizeof(float)); + memset(e->terminals, 1.0f, e->num_agents * sizeof(float)); Log log = {0}; log.length = e->episodeLength; diff --git a/ocean/impulse_wars/helpers.h b/ocean/impulse_wars/helpers.h index 7fb6a8305a..4458ffae71 100644 --- a/ocean/impulse_wars/helpers.h +++ b/ocean/impulse_wars/helpers.h @@ -109,26 +109,12 @@ ASSERTF(fabs(vec.y - norm.y) < 0.000001f, "vec: %f, %f norm: %f, %f", vec.x, vec.y, norm.x, norm.y); \ } while (0) -// use malloc when debugging so the address sanitizer can find issues with -// heap memory, use dlmalloc in release mode for performance; emscripten -// uses dlmalloc by default so no need to change anything here; dlmalloc -// sometimes won't compile on macOS so just use malloc and friends -#if !defined(NDEBUG) || defined(__EMSCRIPTEN__) || defined(__APPLE__) #define fastMalloc(size) malloc(size) #define fastMallocFn malloc #define fastCalloc(nmemb, size) calloc(nmemb, size) #define fastCallocFn calloc #define fastFree(ptr) free(ptr) #define fastFreeFn free -#else -#include "dlmalloc.h" -#define fastMalloc(size) dlmalloc(size) -#define fastMallocFn dlmalloc -#define fastCalloc(nmemb, size) dlcalloc(nmemb, size) -#define fastCallocFn dlcalloc -#define fastFree(ptr) dlfree(ptr) -#define fastFreeFn dlfree -#endif static inline void create_array(CC_Array **array, size_t initialCap) { CC_ArrayConf conf; diff --git a/ocean/impulse_wars/impulse_wars.c b/ocean/impulse_wars/impulse_wars.c index 26db5ae5a6..ec8e82cab3 100644 --- a/ocean/impulse_wars/impulse_wars.c +++ b/ocean/impulse_wars/impulse_wars.c @@ -16,9 +16,9 @@ int main(void) { posix_memalign((void **)&e->observations, sizeof(void *), alignedSize(NUM_DRONES * obsBytes(NUM_DRONES), sizeof(float))); e->rewards = fastCalloc(NUM_DRONES, sizeof(float)); e->actions = fastCalloc(NUM_DRONES * CONTINUOUS_ACTION_SIZE, sizeof(float)); - //e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t)); + // e->masks = fastCalloc(NUM_DRONES, sizeof(uint8_t)); e->terminals = fastCalloc(NUM_DRONES, sizeof(float)); - //e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t)); + // e->truncations = fastCalloc(NUM_DRONES, sizeof(uint8_t)); rayClient *client = createRayClient(); e->client = client; @@ -41,9 +41,9 @@ int main(void) { free(e->observations); fastFree(e->actions); fastFree(e->rewards); - //fastFree(e->masks); + // fastFree(e->masks); fastFree(e->terminals); - //fastFree(e->truncations); + // fastFree(e->truncations); fastFree(e); destroyRayClient(client); #endif diff --git a/ocean/impulse_wars/impulse_wars.py b/ocean/impulse_wars/impulse_wars.py deleted file mode 100644 index 6fc2f5d27e..0000000000 --- a/ocean/impulse_wars/impulse_wars.py +++ /dev/null @@ -1,181 +0,0 @@ -from types import SimpleNamespace - -import gymnasium -import numpy as np - -import pufferlib -from pufferlib.ocean.impulse_wars import binding - - -discMoveToContMove = np.array([ - [1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0, 0.707107, 0.0], - [0.0, 0.707107, 1.0, 0.707107, 0.0, -0.707107, -1.0, -0.707107, 0.0], -], dtype=np.float32) -discAimToContAim = np.array([ - [1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0, 0.382683, 0.707107, 0.92388, 0.0], - [0.0, 0.382683, 0.707107, 0.92388, 1.0, 0.92388, 0.707107, 0.382683, 0.0, -0.382683, -0.707107, -0.92388, -1.0, -0.92388, -0.707107, -0.382683, 0.0], -], dtype=np.float32) - - -class ImpulseWars(pufferlib.PufferEnv): - def __init__( - self, - num_envs: int = 1, - num_drones: int = 2, - num_agents: int = 1, - enable_teams: bool = False, - sitting_duck: bool = False, - continuous: bool = False, - is_training: bool = True, - human_control: bool = False, - reward_win: float = 2.0, - reward_self_kill: float = -1.0, - reward_enemy_death: float = 1.0, - reward_enemy_kill: float = 1.0, - reward_death: float = -0.25, - reward_energy_emptied: float = -0.75, - reward_weapon_pickup: float = 0.5, - reward_shield_break: float = 0.5, - reward_shot_hit_coef: float = 0.005, - reward_explosion_hit_coef: float = 0.005, - seed: int = 0, - render: bool = False, - report_interval: int = 64, - buf = None, - ): - self.obsInfo = SimpleNamespace(**binding.get_consts(num_drones)) - - if num_envs <= 0: - raise ValueError("num_envs must be greater than 0") - if num_drones > self.obsInfo.maxDrones or num_drones <= 0: - raise ValueError(f"num_drones must greater than 0 and less than or equal to {self.obsInfo.maxDrones}") - if num_agents > num_drones or num_agents <= 0: - raise ValueError("num_agents must greater than 0 and less than or equal to num_drones") - if enable_teams and (num_drones % 2 != 0 or num_drones <= 2): - raise ValueError("enable_teams is only supported for even numbers of drones greater than 2") - - self.numDrones = num_drones - self.continuous = continuous - - self.num_agents = num_agents * num_envs - self.tick = 0 - - # map observations are bit packed to save space, and scalar - # observations need to be floats - self.single_observation_space = gymnasium.spaces.Box( - low=0, high=255, shape=(self.obsInfo.obsBytes,), dtype=np.uint8 - ) - - if self.continuous: - # action space is actually bounded by (-1, 1) but pufferlib - # will check that actions are within the bounds of the action - # space before actions get to the env, and we ensure the actions - # are bounded there; so set bounds to (-inf, inf) here so - # action bounds checks pass - self.single_action_space = gymnasium.spaces.Box( - low=float("-inf"), high=float("inf"), shape=(self.obsInfo.contActionsSize,), dtype=np.float32 - ) - else: - self.single_action_space = gymnasium.spaces.MultiDiscrete( - [ - 9, # move, noop + 8 directions - 17, # aim, noop + 16 directions - 2, # shoot or not - 2, # brake or not - 2, # burst - ] - ) - - self.report_interval = report_interval - self.render_mode = "human" if render else None - - super().__init__(buf) - if not self.continuous: - self.actions = np.zeros((self.num_agents, self.obsInfo.contActionsSize), dtype=np.float32) - - self.c_envs = binding.vec_init( - self.observations, - self.actions, - self.rewards, - self.terminals, - self.truncations, - num_envs, - seed, - num_drones=num_drones, - num_agents=num_agents, - map_idx=-1, - enable_teams=enable_teams, - sitting_duck=sitting_duck, - is_training=is_training, - continuous=continuous, - reward_win=reward_win, - reward_self_kill=reward_self_kill, - reward_enemy_death=reward_enemy_death, - reward_enemy_kill=reward_enemy_kill, - reward_death=reward_death, - reward_energy_emptied=reward_energy_emptied, - reward_weapon_pickup=reward_weapon_pickup, - reward_shield_break=reward_shield_break, - reward_shot_hit_coef=reward_shot_hit_coef, - reward_explosion_hit_coef=reward_explosion_hit_coef, - ) - - binding.shared(self.c_envs) - - def reset(self, seed=None): - self.tick = 0 - if seed is None: - binding.vec_reset(self.c_envs, 0) - else: - binding.vec_reset(self.c_envs, seed) - return self.observations, [] - - def step(self, actions): - if self.continuous: - self.actions[:] = actions - else: - contMove = discMoveToContMove[:, actions[:, 0]].T - contAim = discAimToContAim[:, actions[:, 1]].T - contRest = actions[:, 2:].astype(np.float32) - self.actions[:] = np.concatenate([contMove, contAim, contRest], axis=1) - - self.tick += 1 - binding.vec_step(self.c_envs) - - infos = [] - if self.tick % self.report_interval == 0: - infos.append(binding.vec_log(self.c_envs)) - - return self.observations, self.rewards, self.terminals, self.truncations, infos - - def render(self): - binding.vec_render(self.c_envs, 0) - - def close(self): - binding.vec_close(self.c_envs) - - -def testPerf(timeout, actionCache, numEnvs): - env = ImpulseWars(numEnvs) - - import time - - np.random.seed(int(time.time())) - actions = np.random.uniform(-1, 1, (actionCache, env.num_agents, 7)) - - tick = 0 - start = time.time() - while time.time() - start < timeout: - action = actions[tick % actionCache] - env.step(action) - tick += 1 - - sps = numEnvs * (tick / (time.time() - start)) - print(f"SPS: {sps:,}") - print(f"Steps: {numEnvs * tick}") - - env.close() - - -if __name__ == "__main__": - testPerf(timeout=5, actionCache=1024, numEnvs=1) diff --git a/ocean/impulse_wars/pyproject.toml b/ocean/impulse_wars/pyproject.toml deleted file mode 100644 index df67b2bd17..0000000000 --- a/ocean/impulse_wars/pyproject.toml +++ /dev/null @@ -1,25 +0,0 @@ -[build-system] -requires = ["scikit-build-core>=0.10", "autopxd2>=2.5.0", "cython>=3.0.11"] -build-backend = "scikit_build_core.build" - -[project] -name = "binding" -version = "1.0.0" -requires-python = ">=3.11" - -[tool.scikit-build] -minimum-version = "build-system.requires" -cmake.build-type = "Release" -build.verbose = true -logging.level = "INFO" - -[tool.scikit-build.cmake.define] -BUILD_PYTHON_MODULE = true -CMAKE_C_COMPILER = "clang-20" - -[tool.ruff] -line-length = 110 - -[tool.ruff.lint] -# skip "Module level import not at top of file" -ignore = ["E402"] diff --git a/ocean/impulse_wars/types.h b/ocean/impulse_wars/types.h index 113060ba2e..682b280e72 100644 --- a/ocean/impulse_wars/types.h +++ b/ocean/impulse_wars/types.h @@ -10,7 +10,7 @@ #include "settings.h" -#define _MAX_DRONES 4 +#define _MAX_DRONES 2 const uint8_t NUM_WALL_TYPES = 3; @@ -439,15 +439,16 @@ typedef struct iwEnv { uint8_t *observations; float *rewards; float *actions; - //uint8_t *masks; + // uint8_t *masks; float *terminals; - //uint8_t *truncations; + // uint8_t *truncations; uint8_t frameRate; float deltaTime; uint8_t frameSkip; uint8_t box2dSubSteps; uint64_t rng; + bool isSetup; bool needsReset; uint16_t episodeLength; diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 8fc0c03a89..19c54791aa 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -146,7 +146,7 @@ def print_dashboard(args, model_size, flat_logs, clear=False, idx=[0], u = left if i % 2 == 0 else right u.add_row(f'{b2}{k[4:]}', f'{b2}{v:.3f}') i += 1 - if i == 30: + if i == 60: break if clear: diff --git a/resources/impulse_wars/shaders/gls330/bloom.fs b/resources/impulse_wars/shaders/gls330/bloom.fs index 246acb6af8..bcd2fa189d 100644 --- a/resources/impulse_wars/shaders/gls330/bloom.fs +++ b/resources/impulse_wars/shaders/gls330/bloom.fs @@ -23,7 +23,7 @@ #define BLOOM_ADDITIVE 1 #define BLOOM_SOFT_LIGHT 2 -noperspective in vec2 fragTexCoord; +in vec2 fragTexCoord; uniform sampler2D uTexColor; uniform sampler2D uTexBloomBlur; diff --git a/resources/impulse_wars/shaders/gls330/blur.fs b/resources/impulse_wars/shaders/gls330/blur.fs index f853a495e1..880d2ad2cc 100644 --- a/resources/impulse_wars/shaders/gls330/blur.fs +++ b/resources/impulse_wars/shaders/gls330/blur.fs @@ -22,7 +22,7 @@ #version 330 core -noperspective in vec2 fragTexCoord; +in vec2 fragTexCoord; uniform sampler2D uTexture; uniform vec2 uTexelDir; diff --git a/src/bindings.cu b/src/bindings.cu index 4469cb512c..4f413e8983 100644 --- a/src/bindings.cu +++ b/src/bindings.cu @@ -106,7 +106,7 @@ pybind11::dict puf_eval_log(pybind11::object pufferl_obj) { pufferl.last_log_step = pufferl.global_step; pybind11::dict env_dict; - Dict* env_out = create_dict(32); + Dict* env_out = create_dict(64); static_vec_eval_log(pufferl.vec, env_out); for (int i = 0; i < env_out->size; i++) { env_dict[env_out->items[i].key] = env_out->items[i].value; @@ -318,7 +318,7 @@ void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) { } py::dict vec_log(VecEnv& ve) { - Dict* out = create_dict(32); + Dict* out = create_dict(64); static_vec_log(ve.vec, out); py::dict result; for (int i = 0; i < out->size; i++) { diff --git a/src/bindings_cpu.cpp b/src/bindings_cpu.cpp index 5ba4dc81e5..a4e0b7633c 100644 --- a/src/bindings_cpu.cpp +++ b/src/bindings_cpu.cpp @@ -141,7 +141,7 @@ static void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) { } static py::dict vec_log(VecEnv& ve) { - Dict* out = create_dict(32); + Dict* out = create_dict(64); static_vec_log(ve.vec, out); py::dict result; for (int i = 0; i < out->size; i++) diff --git a/src/pufferlib.cu b/src/pufferlib.cu index 3a3e6ee00e..c74d3553ee 100644 --- a/src/pufferlib.cu +++ b/src/pufferlib.cu @@ -330,7 +330,7 @@ typedef struct { } PuffeRL; Dict* log_environments_impl(PuffeRL& pufferl) { - Dict* out = create_dict(32); + Dict* out = create_dict(64); static_vec_log(pufferl.vec, out); return out; } From 3c9ae6fc77aaf520f35793d7ae1dee6e4661ce9f Mon Sep 17 00:00:00 2001 From: Andrew LeFevre Date: Tue, 28 Apr 2026 15:49:06 -0400 Subject: [PATCH 4/4] removed now unused dlmalloc dep --- vendor/dlmalloc.h | 6264 --------------------------------------------- 1 file changed, 6264 deletions(-) delete mode 100644 vendor/dlmalloc.h diff --git a/vendor/dlmalloc.h b/vendor/dlmalloc.h deleted file mode 100644 index 4ef7c9cfd5..0000000000 --- a/vendor/dlmalloc.h +++ /dev/null @@ -1,6264 +0,0 @@ -/* - Default header file for malloc-2.7.2, written by Doug Lea - and released to the public domain. Use, modify, and redistribute - this code without permission or acknowledgement in any way you wish. - Send questions, comments, complaints, performance data, etc to - dl@cs.oswego.edu. - - last update: Sun Feb 25 18:38:11 2001 Doug Lea (dl at gee) - - This header is for ANSI C/C++ only. You can set either of - the following #defines before including: - - * If USE_DL_PREFIX is defined, it is assumed that malloc.c - was also compiled with this option, so all routines - have names starting with "dl". - - * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this - file will be #included AFTER . This is needed only if - your system defines a struct mallinfo that is incompatible with the - standard one declared here. Otherwise, you can include this file - INSTEAD of your system system . At least on ANSI, all - declarations should be compatible with system versions -*/ - -#ifndef MALLOC_270_H -#define MALLOC_270_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include /* for size_t */ - -#define USE_DL_PREFIX - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or - null if no space is available. Additionally, on failure, errno is - set to ENOMEM on ANSI C systems. - - If n is zero, malloc returns a minimum-sized chunk. The minimum size - is 16 bytes on most 32bit systems, and either 24 or 32 bytes on - 64bit systems, depending on internal size and alignment restrictions. - - On most systems, size_t is an unsigned type. Calls with values of n - that appear "negative" when signed are interpreted as requests for - huge amounts of space, which will most often fail. - - The maximum allowed value of n differs across systems, but is in all - cases less (typically by 8K) than the maximum representable value of - a size_t. Requests greater than this value result in failure. -*/ - -#ifndef USE_DL_PREFIX -void *malloc(size_t); -#else -void *dlmalloc(size_t); -#endif - -/* - free(void* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. It can have arbitrary (and bad!) - effects if p has already been freed or was not obtained via malloc. - - Unless disabled using mallopt, freeing very large spaces will, - when possible, automatically trigger operations that give - back unused memory to the system, thus reducing program footprint. -*/ -#ifndef USE_DL_PREFIX -void free(void *); -#else -void dlfree(void *); -#endif - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -#ifndef USE_DL_PREFIX -void *calloc(size_t, size_t); -#else -void *dlcalloc(size_t, size_t); -#endif - -/* - realloc(void* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p when possible, otherwise it employs the - equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. Unless the #define - REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of - zero (re)allocates a minimum-sized chunk. - - Large chunks that were internally obtained via mmap will always - be reallocated using malloc-copy-free sequences unless - the system supports MREMAP (currently only linux). - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ - -#ifndef USE_DL_PREFIX -void *realloc(void *, size_t); -#else -void *dlrealloc(void *, size_t); -#endif - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ - -#ifndef USE_DL_PREFIX -void *memalign(size_t, size_t); -#else -void *dlmemalign(size_t, size_t); -#endif - -/* - valloc(size_t n); - Allocates a page-aligned chunk of at least n bytes. - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ - -#ifndef USE_DL_PREFIX -void *valloc(size_t); -#else -void *dlvalloc(size_t); -#endif - -/* - independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements, each of which can hold contents of size - elem_size. Each element starts out cleared, and can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use regular calloc and assign pointers into this - space to represent elements. (In this case though, you cannot - independently free elements.) - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) return 0; // failure - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ - -#ifndef USE_DL_PREFIX -void **independent_calloc(size_t, size_t, void **); -#else -void **dlindependent_calloc(size_t, size_t, void **); -#endif - -/* - independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use a single regular malloc, and assign pointers at - particular offsets in the aggregate space. (In this case though, you - cannot independently free elements.) - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ - -#ifndef USE_DL_PREFIX -void **independent_comalloc(size_t, size_t *, void **); -#else -void **dlindependent_comalloc(size_t, size_t *, void **); -#endif - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ - -#ifndef USE_DL_PREFIX -void *pvalloc(size_t); -#else -void *dlpvalloc(size_t); -#endif - -/* - cfree(void* p); - Equivalent to free(p). - - cfree is needed/defined on some systems that pair it with calloc, - for odd historical reasons (such as: cfree is used in example - code in the first edition of K&R). -*/ - -#ifndef USE_DL_PREFIX -void cfree(void *); -#else -void dlcfree(void *); -#endif - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative - arguments to sbrk) if there is unused memory at the `high' end of - the malloc pool. You can call this after freeing large blocks of - memory to potentially reduce the system-level memory requirements - of a program. However, it cannot guarantee to reduce memory. Under - some allocation patterns, some large free blocks of memory will be - locked between two used chunks, so they cannot be given back to - the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, - only the minimum amount of memory to maintain internal data - structures will be left (one page or less). Non-zero arguments - can be supplied to maintain enough trailing space to service - future expected allocations without having to re-obtain memory - from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. - On systems that do not support "negative sbrks", it will always - return 0. -*/ - -#ifndef USE_DL_PREFIX -int malloc_trim(size_t); -#else -int dlmalloc_trim(size_t); -#endif - -/* - malloc_usable_size(void* p); - - Returns the number of bytes you can actually use in an allocated - chunk, which may be more than you requested (although often not) due - to alignment and minimum size constraints. You can use this many - bytes without worrying about overwriting other allocated - objects. This is not a particularly great programming practice. But - malloc_usable_size can be more useful in debugging and assertions, - for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); -*/ - -#ifndef USE_DL_PREFIX -size_t malloc_usable_size(void *); -#else -size_t dlmalloc_usable_size(void *); -#endif - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. -*/ - -#ifndef USE_DL_PREFIX -void malloc_stats(void); -#else -void dlmalloc_stats(void); -#endif - -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: the number of fastbin blocks (i.e., small chunks that - have been freed but not use resused or consolidated) - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: total bytes held in fastbin blocks - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - The names of some of these fields don't bear much relation with - their contents because this struct was defined as standard in - SVID/XPG so reflects the malloc implementation that was then used - in SystemV Unix. - - The original SVID version of this struct, defined on most systems - with mallinfo, declares all fields as ints. But some others define - as unsigned long. If your system defines the fields using a type of - different width than listed here, you should #include your system - version before including this file. The struct declaration is - suppressed if _MALLOC_H is defined (which is done in most system - malloc.h files). You can also suppress it by defining - HAVE_USR_INCLUDE_MALLOC_H. - - Because these fields are ints, but internal bookkeeping is done with - unsigned longs, the reported values may appear as negative, and may - wrap around zero and thus be inaccurate. -*/ - -#ifndef HAVE_USR_INCLUDE_MALLOC_H -#ifndef _MALLOC_H -struct mallinfo { - int arena; - int ordblks; - int smblks; - int hblks; - int hblkhd; - int usmblks; - int fsmblks; - int uordblks; - int fordblks; - int keepcost; -}; -#endif -#endif - -#ifndef USE_DL_PREFIX -struct mallinfo mallinfo(void); -#else -struct mallinfo mallinfo(void); -#endif - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. SVID/XPG defines four standard param numbers for mallopt, - normally defined in malloc.h. Only one of these (M_MXFAST) is used - in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, - so setting them has no effect. But this malloc also supports four - other options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_MXFAST 1 64 0-80 (0 disables fastbins) - M_TRIM_THRESHOLD -1 128*1024 any (-1U disables trimming) - M_TOP_PAD -2 0 any - M_MMAP_THRESHOLD -3 128*1024 any (or 0 if no MMAP support) - M_MMAP_MAX -4 65536 any (0 disables use of mmap) -*/ - -#ifndef USE_DL_PREFIX -int mallopt(int, int); -#else -int dlmallopt(int, int); -#endif - -/* Descriptions of tuning options */ - -/* - M_MXFAST is the maximum request size used for "fastbins", special bins - that hold returned chunks without consolidating their spaces. This - enables future requests for chunks of the same size to be handled - very quickly, but can increase fragmentation, and thus increase the - overall memory footprint of a program. - - This malloc manages fastbins very conservatively yet still - efficiently, so fragmentation is rarely a problem for values less - than or equal to the default. The maximum supported value of MXFAST - is 80. You wouldn't want it any higher than this anyway. Fastbins - are designed especially for use with many small structs, objects or - strings -- the default handles structs/objects/arrays with sizes up - to 8 4byte fields, or small strings representing words, tokens, - etc. Using fastbins for larger objects normally worsens - fragmentation without improving speed. - - You can reduce M_MXFAST to 0 to disable all use of fastbins. This - causes the malloc algorithm to be a closer approximation of - fifo-best-fit in all cases, not just for larger requests, but will - generally cause it to be slower. -*/ - -#ifndef M_MXFAST -#define M_MXFAST 1 -#endif - -/* - M_TRIM_THRESHOLD is the maximum amount of unused top-most memory - to keep before releasing via malloc_trim in free(). - - Automatic trimming is mainly useful in long-lived programs. - Because trimming via sbrk can be slow on some systems, and can - sometimes be wasteful (in cases where programs immediately - afterward allocate more large chunks) the value should be high - enough so that your overall system performance would improve by - releasing this much memory. - - The trim threshold and the mmap control parameters (see below) - can be traded off with one another. Trimming and mmapping are - two different ways of releasing unused memory back to the - system. Between these two, it is often possible to keep - system-level demands of a long-lived program down to a bare - minimum. For example, in one test suite of sessions measuring - the XF86 X server on Linux, using a trim threshold of 128K and a - mmap threshold of 192K led to near-minimal long term resource - consumption. - - If you are using this malloc in a long-lived program, it should - pay to experiment with these values. As a rough guide, you - might set to a value close to the average size of a process - (program) running on your system. Releasing this much memory - would allow such a process to run in memory. Generally, it's - worth it to tune for trimming rather tham memory mapping when a - program undergoes phases where several large chunks are - allocated and released in ways that can reuse each other's - storage, perhaps mixed with phases where there are no such - chunks at all. And in well-behaved long-lived programs, - controlling release of large blocks via trimming versus mapping - is usually faster. - - However, in most programs, these parameters serve mainly as - protection against the system-level effects of carrying around - massive amounts of unneeded memory. Since frequent calls to - sbrk, mmap, and munmap otherwise degrade performance, the default - parameters are set to relatively high values that serve only as - safeguards. - - The trim value It must be greater than page size to have any useful - effect. To disable trimming completely, you can set to - (unsigned long)(-1) - - Trim settings interact with fastbin (MXFAST) settings: Unless - compiled with TRIM_FASTBINS defined, automatic trimming never takes - place upon freeing a chunk with size less than or equal to - MXFAST. Trimming is instead delayed until subsequent freeing of - larger chunks. However, you can still force an attempted trim by - calling malloc_trim. - - Also, trimming is not generally possible in cases where - the main arena is obtained via mmap. - - Note that the trick some people use of mallocing a huge space and - then freeing it at program startup, in an attempt to reserve system - memory, doesn't have the intended effect under automatic trimming, - since that memory will immediately be returned to the system. -*/ - -#define M_TRIM_THRESHOLD -1 - -/* - M_TOP_PAD is the amount of extra `padding' space to allocate or - retain whenever sbrk is called. It is used in two ways internally: - - * When sbrk is called to extend the top of the arena to satisfy - a new malloc request, this much padding is added to the sbrk - request. - - * When malloc_trim is called automatically from free(), - it is used as the `pad' argument. - - In both cases, the actual amount of padding is rounded - so that the end of the arena is always a system page boundary. - - The main reason for using padding is to avoid calling sbrk so - often. Having even a small pad greatly reduces the likelihood - that nearly every malloc request during program start-up (or - after trimming) will invoke sbrk, which needlessly wastes - time. - - Automatic rounding-up to page-size units is normally sufficient - to avoid measurable overhead, so the default is 0. However, in - systems where sbrk is relatively slow, it can pay to increase - this value, at the expense of carrying around more memory than - the program needs. -*/ - -#define M_TOP_PAD -2 - -/* - M_MMAP_THRESHOLD is the request size threshold for using mmap() - to service a request. Requests of at least this size that cannot - be allocated using already-existing space will be serviced via mmap. - (If enough normal freed space already exists it is used instead.) - - Using mmap segregates relatively large chunks of memory so that - they can be individually obtained and released from the host - system. A request serviced through mmap is never reused by any - other request (at least not directly; the system may just so - happen to remap successive requests to the same locations). - - Segregating space in this way has the benefits that: - - 1. Mmapped space can ALWAYS be individually released back - to the system, which helps keep the system level memory - demands of a long-lived program low. - 2. Mapped memory can never become `locked' between - other chunks, as can happen with normally allocated chunks, which - means that even trimming via malloc_trim would not release them. - 3. On some systems with "holes" in address spaces, mmap can obtain - memory that sbrk cannot. - - However, it has the disadvantages that: - - 1. The space cannot be reclaimed, consolidated, and then - used to service later requests, as happens with normal chunks. - 2. It can lead to more wastage because of mmap page alignment - requirements - 3. It causes malloc performance to be more dependent on host - system memory management support routines. - - The advantages of mmap nearly always outweigh disadvantages for - "large" chunks, but the value of "large" varies across systems. The - default is an empirically derived value that works well in most - systems. -*/ - -#define M_MMAP_THRESHOLD -3 - -/* - M_MMAP_MAX is the maximum number of requests to simultaneously - service using mmap. This parameter exists because - some systems have a limited number of internal tables for - use by mmap, and using more than a few of them may degrade - performance. - - The default is set to a value that serves only as a safeguard. - Setting to 0 disables use of mmap for servicing large requests. If - mmap is not supported on a system, the default value is 0, and - attempts to set it to non-zero values in mallopt will fail. -*/ - -#define M_MMAP_MAX -4 - -/* Unused SVID2/XPG mallopt options, listed for completeness */ - -#ifndef M_NBLKS -#define M_NLBLKS 2 /* UNUSED in this malloc */ -#endif -#ifndef M_GRAIN -#define M_GRAIN 3 /* UNUSED in this malloc */ -#endif -#ifndef M_KEEP -#define M_KEEP 4 /* UNUSED in this malloc */ -#endif - -/* - Some malloc.h's declare alloca, even though it is not part of malloc. -*/ - -#ifndef _ALLOCA_H -extern void *alloca(size_t); -#endif - -/* - This is a version (aka dlmalloc) of malloc/free/realloc written by - Doug Lea and released to the public domain. Use, modify, and - redistribute this code without permission or acknowledgement in any - way you wish. Send questions, comments, complaints, performance - data, etc to dl@cs.oswego.edu - -* VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) - - Note: There may be an updated version of this malloc obtainable at - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - Check before installing! - -* Quickstart - - This library is all in one file to simplify the most common usage: - ftp it, compile it (-O), and link it into another program. All - of the compile-time options default to reasonable values for use on - most unix platforms. Compile -DWIN32 for reasonable defaults on windows. - You might later want to step through various compile-time and dynamic - tuning options. - - For convenience, an include file for code using this malloc is at: - ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h - You don't really need this .h file unless you call functions not - defined in your system include files. The .h file contains only the - excerpts from this file needed for using this malloc on ANSI C/C++ - systems, so long as you haven't changed compile-time options about - naming and tuning parameters. If you do, then you can create your - own malloc.h that does include all settings by cutting at the point - indicated below. - -* Why use this malloc? - - This is not the fastest, most space-conserving, most portable, or - most tunable malloc ever written. However it is among the fastest - while also being among the most space-conserving, portable and tunable. - Consistent balance across these factors results in a good general-purpose - allocator for malloc-intensive programs. - - The main properties of the algorithms are: - * For large (>= 512 bytes) requests, it is a pure best-fit allocator, - with ties normally decided via FIFO (i.e. least recently used). - * For small (<= 64 bytes by default) requests, it is a caching - allocator, that maintains pools of quickly recycled chunks. - * In between, and for combinations of large and small requests, it does - the best it can trying to meet both goals at once. - * For very large requests (>= 128KB by default), it relies on system - memory mapping facilities, if supported. - - For a longer but slightly out of date high-level description, see - http://gee.cs.oswego.edu/dl/html/malloc.html - - You may already by default be using a C library containing a malloc - that is based on some version of this malloc (for example in - linux). You might still want to use the one in this file in order to - customize settings or to avoid overheads associated with library - versions. - -* Contents, described in more detail in "description of public routines" below. - - Standard (ANSI/SVID/...) functions: - malloc(size_t n); - calloc(size_t n_elements, size_t element_size); - free(Void_t* p); - realloc(Void_t* p, size_t n); - memalign(size_t alignment, size_t n); - valloc(size_t n); - mallinfo() - mallopt(int parameter_number, int parameter_value) - - Additional functions: - independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - pvalloc(size_t n); - cfree(Void_t* p); - malloc_trim(size_t pad); - malloc_usable_size(Void_t* p); - malloc_stats(); - -* Vital statistics: - - Supported pointer representation: 4 or 8 bytes - Supported size_t representation: 4 or 8 bytes - Note that size_t is allowed to be 4 bytes even if pointers are 8. - You can adjust this by defining INTERNAL_SIZE_T - - Alignment: 2 * sizeof(size_t) (default) - (i.e., 8 byte alignment with 4byte size_t). This suffices for - nearly all current machines and C compilers. However, you can - define MALLOC_ALIGNMENT to be wider than this if necessary. - - Minimum overhead per allocated chunk: 4 or 8 bytes - Each malloced chunk has a hidden word of overhead holding size - and status information. - - Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) - 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) - - When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte - ptrs but 4 byte size) or 24 (for 8/8) additional bytes are - needed; 4 (8) for a trailing size field and 8 (16) bytes for - free list pointers. Thus, the minimum allocatable size is - 16/24/32 bytes. - - Even a request for zero bytes (i.e., malloc(0)) returns a - pointer to something of the minimum allocatable size. - - The maximum overhead wastage (i.e., number of extra bytes - allocated than were requested in malloc) is less than or equal - to the minimum size, except for requests >= mmap_threshold that - are serviced via mmap(), where the worst case wastage is 2 * - sizeof(size_t) bytes plus the remainder from a system page (the - minimal mmap unit); typically 4096 or 8192 bytes. - - Maximum allocated size: 4-byte size_t: 2^32 minus about two pages - 8-byte size_t: 2^64 minus about two pages - - It is assumed that (possibly signed) size_t values suffice to - represent chunk sizes. `Possibly signed' is due to the fact - that `size_t' may be defined on a system as either a signed or - an unsigned type. The ISO C standard says that it must be - unsigned, but a few systems are known not to adhere to this. - Additionally, even when size_t is unsigned, sbrk (which is by - default used to obtain memory from system) accepts signed - arguments, and may not be able to handle size_t-wide arguments - with negative sign bit. Generally, values that would - appear as negative after accounting for overhead and alignment - are supported only via mmap(), which does not have this - limitation. - - Requests for sizes outside the allowed range will perform an optional - failure action and then return null. (Requests may also - also fail because a system is out of memory.) - - Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined - - When USE_MALLOC_LOCK is defined, wrappers are created to - surround every public call with either a pthread mutex or - a win32 spinlock (depending on WIN32). This is not - especially fast, and can be a major bottleneck. - It is designed only to provide minimal protection - in concurrent environments, and to provide a basis for - extensions. If you are using malloc in a concurrent program, - you would be far better off obtaining ptmalloc, which is - derived from a version of this malloc, and is well-tuned for - concurrent programs. (See http://www.malloc.de) Note that - even when USE_MALLOC_LOCK is defined, you can can guarantee - full thread-safety only if no threads acquire memory through - direct calls to MORECORE or other system-level allocators. - - Compliance: I believe it is compliant with the 1997 Single Unix Specification - (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably - others as well. - -* Synopsis of compile-time options: - - People have reported using previous versions of this malloc on all - versions of Unix, sometimes by tweaking some of the defines - below. It has been tested most extensively on Solaris and - Linux. It is also reported to work on WIN32 platforms. - People also report using it in stand-alone embedded systems. - - The implementation is in straight, hand-tuned ANSI C. It is not - at all modular. (Sorry!) It uses a lot of macros. To be at all - usable, this code should be compiled using an optimizing compiler - (for example gcc -O3) that can simplify expressions and control - paths. (FAQ: some macros import variables as arguments rather than - declare locals because people reported that some debuggers - otherwise get confused.) - - OPTION DEFAULT VALUE - - Compilation Environment options: - - __STD_C derived from C compiler defines - WIN32 NOT defined - HAVE_MEMCPY defined - USE_MEMCPY 1 if HAVE_MEMCPY is defined - HAVE_MMAP defined as 1 - MMAP_CLEARS 1 - HAVE_MREMAP 0 unless linux defined - malloc_getpagesize derived from system #includes, or 4096 if not - HAVE_USR_INCLUDE_MALLOC_H NOT defined - LACKS_UNISTD_H NOT defined unless WIN32 - LACKS_SYS_PARAM_H NOT defined unless WIN32 - LACKS_SYS_MMAN_H NOT defined unless WIN32 - LACKS_FCNTL_H NOT defined - - Changing default word sizes: - - INTERNAL_SIZE_T size_t - MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) - PTR_UINT unsigned long - CHUNK_SIZE_T unsigned long - - Configuration and functionality options: - - USE_DL_PREFIX NOT defined - USE_PUBLIC_MALLOC_WRAPPERS NOT defined - USE_MALLOC_LOCK NOT defined - DL_DEBUG NOT defined - REALLOC_ZERO_BYTES_FREES NOT defined - MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op - TRIM_FASTBINS 0 - FIRST_SORTED_BIN_SIZE 512 - - Options for customizing MORECORE: - - MORECORE sbrk - MORECORE_CONTIGUOUS 1 - MORECORE_CANNOT_TRIM NOT defined - MMAP_AS_MORECORE_SIZE (1024 * 1024) - - Tuning options that are also dynamically changeable via mallopt: - - DEFAULT_MXFAST 64 - DEFAULT_TRIM_THRESHOLD 256 * 1024 - DEFAULT_TOP_PAD 0 - DEFAULT_MMAP_THRESHOLD 256 * 1024 - DEFAULT_MMAP_MAX 65536 - - There are several other #defined constants and macros that you - probably don't want to touch unless you are extending or adapting malloc. -*/ - -/* - WIN32 sets up defaults for MS environment and compilers. - Otherwise defaults are for unix. -*/ - -/* #define WIN32 */ - -#ifdef WIN32 - -#define WIN32_LEAN_AND_MEAN -#include - -/* Win32 doesn't supply or need the following headers */ -#define LACKS_UNISTD_H -#define LACKS_SYS_PARAM_H -#define LACKS_SYS_MMAN_H - -/* Use the supplied emulation of sbrk */ -#define MORECORE sbrk -#define MORECORE_CONTIGUOUS 1 -#define MORECORE_FAILURE ((void *)(-1)) - -/* Use the supplied emulation of mmap and munmap */ -#define HAVE_MMAP 1 -#define MUNMAP_FAILURE (-1) -#define MMAP_CLEARS 1 - -/* These values don't really matter in windows mmap emulation */ -#define MAP_PRIVATE 1 -#define MAP_ANONYMOUS 2 -#define PROT_READ 1 -#define PROT_WRITE 2 - -/* Emulation functions defined at the end of this file */ - -/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */ -#ifdef USE_MALLOC_LOCK -static int slwait(int *sl); -static int slrelease(int *sl); -#endif - -static long getpagesize(void); -static long getregionsize(void); -static void *sbrk(long size); -static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg); -static long munmap(void *ptr, long size); - -static void vminfo(unsigned long *free, unsigned long *reserved, unsigned long *committed); -static int cpuinfo(int whole, unsigned long *kernel, unsigned long *user); - -#endif - -/* - __STD_C should be nonzero if using ANSI-standard C compiler, a C++ - compiler, or a C compiler sufficiently close to ANSI to get away - with it. -*/ - -#ifndef __STD_C -#if defined(__STDC__) || defined(_cplusplus) -#define __STD_C 1 -#else -#define __STD_C 0 -#endif -#endif /*__STD_C*/ - -/* - Void_t* is the pointer type that malloc should say it returns -*/ - -#ifndef Void_t -#if (__STD_C || defined(WIN32)) -#define Void_t void -#else -#define Void_t char -#endif -#endif /*Void_t*/ - -#if __STD_C -#include /* for size_t */ -#else -#include -#endif - -/* define LACKS_UNISTD_H if your system does not have a . */ - -/* #define LACKS_UNISTD_H */ - -#ifndef LACKS_UNISTD_H -#include -#endif - -/* define LACKS_SYS_PARAM_H if your system does not have a . */ - -/* #define LACKS_SYS_PARAM_H */ - -#include /* needed for optional MALLOC_FAILURE_ACTION */ -#include /* needed for malloc_stats */ - -/* - Debugging: - - Because freed chunks may be overwritten with bookkeeping fields, this - malloc will often die when freed memory is overwritten by user - programs. This can be very effective (albeit in an annoying way) - in helping track down dangling pointers. - - If you compile with -DDL_DEBUG, a number of assertion checks are - enabled that will catch more memory errors. You probably won't be - able to make much sense of the actual assertion errors, but they - should help you locate incorrectly overwritten memory. The - checking is fairly extensive, and will slow down execution - noticeably. Calling malloc_stats or mallinfo with DL_DEBUG set will - attempt to check every non-mmapped allocated and free chunk in the - course of computing the summmaries. (By nature, mmapped regions - cannot be checked very much automatically.) - - Setting DL_DEBUG may also be helpful if you are trying to modify - this code. The assertions in the check routines spell out in more - detail the assumptions and invariants underlying the algorithms. - - Setting DL_DEBUG does NOT provide an automated mechanism for checking - that all accesses to malloced memory stay within their - bounds. However, there are several add-ons and adaptations of this - or other mallocs available that do this. -*/ - -#include - -/* - The unsigned integer type used for comparing any two chunk sizes. - This should be at least as wide as size_t, but should not be signed. -*/ - -#ifndef CHUNK_SIZE_T -#define CHUNK_SIZE_T unsigned long -#endif - -/* - The unsigned integer type used to hold addresses when they are are - manipulated as integers. Except that it is not defined on all - systems, intptr_t would suffice. -*/ -#ifndef PTR_UINT -#define PTR_UINT unsigned long -#endif - -/* - INTERNAL_SIZE_T is the word-size used for internal bookkeeping - of chunk sizes. - - The default version is the same as size_t. - - While not strictly necessary, it is best to define this as an - unsigned type, even if size_t is a signed type. This may avoid some - artificial size limitations on some systems. - - On a 64-bit machine, you may be able to reduce malloc overhead by - defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the - expense of not being able to handle more than 2^32 of malloced - space. If this limitation is acceptable, you are encouraged to set - this unless you are on a platform requiring 16byte alignments. In - this case the alignment requirements turn out to negate any - potential advantages of decreasing size_t word size. - - Implementors: Beware of the possible combinations of: - - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, - and might be the same width as int or as long - - size_t might have different width and signedness as INTERNAL_SIZE_T - - int and long might be 32 or 64 bits, and might be the same width - To deal with this, most comparisons and difference computations - among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being - aware of the fact that casting an unsigned int to a wider long does - not sign-extend. (This also makes checking for negative numbers - awkward.) Some of these casts result in harmless compiler warnings - on some systems. -*/ - -#ifndef INTERNAL_SIZE_T -#define INTERNAL_SIZE_T size_t -#endif - -/* The corresponding word size */ -#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) - -/* - MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. - It must be a power of two at least 2 * SIZE_SZ, even on machines - for which smaller alignments would suffice. It may be defined as - larger than this though. Note however that code and data structures - are optimized for the case of 8-byte alignment. -*/ - -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT (2 * SIZE_SZ) -#endif - -/* The corresponding bit mask value */ -#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) - -/* - REALLOC_ZERO_BYTES_FREES should be set if a call to - realloc with zero bytes should be the same as a call to free. - Some people think it should. Otherwise, since this malloc - returns a unique pointer for malloc(0), so does realloc(p, 0). -*/ - -/* #define REALLOC_ZERO_BYTES_FREES */ - -/* - TRIM_FASTBINS controls whether free() of a very small chunk can - immediately lead to trimming. Setting to true (1) can reduce memory - footprint, but will almost always slow down programs that use a lot - of small chunks. - - Define this only if you are willing to give up some speed to more - aggressively reduce system-level memory footprint when releasing - memory in programs that use many small chunks. You can get - essentially the same effect by setting MXFAST to 0, but this can - lead to even greater slowdowns in programs using many small chunks. - TRIM_FASTBINS is an in-between compile-time option, that disables - only those chunks bordering topmost memory from being placed in - fastbins. -*/ - -#ifndef TRIM_FASTBINS -#define TRIM_FASTBINS 0 -#endif - -/* - USE_DL_PREFIX will prefix all public routines with the string 'dl'. - This is necessary when you only want to use this malloc in one part - of a program, using your regular system malloc elsewhere. -*/ - -/* #define USE_DL_PREFIX */ - -/* - USE_MALLOC_LOCK causes wrapper functions to surround each - callable routine with pthread mutex lock/unlock. - - USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined -*/ - -/* #define USE_MALLOC_LOCK */ - -/* - If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is - actually a wrapper function that first calls MALLOC_PREACTION, then - calls the internal routine, and follows it with - MALLOC_POSTACTION. This is needed for locking, but you can also use - this, without USE_MALLOC_LOCK, for purposes of interception, - instrumentation, etc. It is a sad fact that using wrappers often - noticeably degrades performance of malloc-intensive programs. -*/ - -#ifdef USE_MALLOC_LOCK -#define USE_PUBLIC_MALLOC_WRAPPERS -#else -/* #define USE_PUBLIC_MALLOC_WRAPPERS */ -#endif - -/* - Two-phase name translation. - All of the actual routines are given mangled names. - When wrappers are used, they become the public callable versions. - When DL_PREFIX is used, the callable names are prefixed. -*/ - -#ifndef USE_PUBLIC_MALLOC_WRAPPERS -#define cALLOc public_cALLOc -#define fREe public_fREe -#define cFREe public_cFREe -#define mALLOc public_mALLOc -#define mEMALIGn public_mEMALIGn -#define rEALLOc public_rEALLOc -#define vALLOc public_vALLOc -#define pVALLOc public_pVALLOc -#define mALLINFo public_mALLINFo -#define mALLOPt public_mALLOPt -#define mTRIm public_mTRIm -#define mSTATs public_mSTATs -#define mUSABLe public_mUSABLe -#define iCALLOc public_iCALLOc -#define iCOMALLOc public_iCOMALLOc -#endif - -#ifdef USE_DL_PREFIX -#define public_cALLOc dlcalloc -#define public_fREe dlfree -#define public_cFREe dlcfree -#define public_mALLOc dlmalloc -#define public_mEMALIGn dlmemalign -#define public_rEALLOc dlrealloc -#define public_vALLOc dlvalloc -#define public_pVALLOc dlpvalloc -#define public_mALLINFo dlmallinfo -#define public_mALLOPt dlmallopt -#define public_mTRIm dlmalloc_trim -#define public_mSTATs dlmalloc_stats -#define public_mUSABLe dlmalloc_usable_size -#define public_iCALLOc dlindependent_calloc -#define public_iCOMALLOc dlindependent_comalloc -#else /* USE_DL_PREFIX */ -#define public_cALLOc calloc -#define public_fREe free -#define public_cFREe cfree -#define public_mALLOc malloc -#define public_mEMALIGn memalign -#define public_rEALLOc realloc -#define public_vALLOc valloc -#define public_pVALLOc pvalloc -#define public_mALLINFo mallinfo -#define public_mALLOPt mallopt -#define public_mTRIm malloc_trim -#define public_mSTATs malloc_stats -#define public_mUSABLe malloc_usable_size -#define public_iCALLOc independent_calloc -#define public_iCOMALLOc independent_comalloc -#endif /* USE_DL_PREFIX */ - -/* - HAVE_MEMCPY should be defined if you are not otherwise using - ANSI STD C, but still have memcpy and memset in your C library - and want to use them in calloc and realloc. Otherwise simple - macro versions are defined below. - - USE_MEMCPY should be defined as 1 if you actually want to - have memset and memcpy called. People report that the macro - versions are faster than libc versions on some systems. - - Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks - (of <= 36 bytes) are manually unrolled in realloc and calloc. -*/ - -#define HAVE_MEMCPY - -#ifndef USE_MEMCPY -#ifdef HAVE_MEMCPY -#define USE_MEMCPY 1 -#else -#define USE_MEMCPY 0 -#endif -#endif - -#if (__STD_C || defined(HAVE_MEMCPY)) - -#ifdef WIN32 -/* On Win32 memset and memcpy are already declared in windows.h */ -#else -#if __STD_C -void *memset(void *, int, size_t); -void *memcpy(void *, const void *, size_t); -#else -Void_t *memset(); -Void_t *memcpy(); -#endif -#endif -#endif - -/* - MALLOC_FAILURE_ACTION is the action to take before "return 0" when - malloc fails to be able to return memory, either because memory is - exhausted or because of illegal arguments. - - By default, sets errno if running on STD_C platform, else does nothing. -*/ - -#ifndef MALLOC_FAILURE_ACTION -#if __STD_C -#define MALLOC_FAILURE_ACTION \ - errno = ENOMEM; - -#else -#define MALLOC_FAILURE_ACTION -#endif -#endif - -/* - MORECORE-related declarations. By default, rely on sbrk -*/ - -#ifdef LACKS_UNISTD_H -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) -#if __STD_C -extern Void_t *sbrk(ptrdiff_t); -#else -extern Void_t *sbrk(); -#endif -#endif -#endif - -/* - MORECORE is the name of the routine to call to obtain more memory - from the system. See below for general guidance on writing - alternative MORECORE functions, as well as a version for WIN32 and a - sample version for pre-OSX macos. -*/ - -// #define _GNU_SOURCE -// #include -extern void *sbrk(intptr_t __delta) __THROW; -#define MORECORE sbrk - -/* - MORECORE_FAILURE is the value returned upon failure of MORECORE - as well as mmap. Since it cannot be an otherwise valid memory address, - and must reflect values of standard sys calls, you probably ought not - try to redefine it. -*/ - -#ifndef MORECORE_FAILURE -#define MORECORE_FAILURE (-1) -#endif - -/* - If MORECORE_CONTIGUOUS is true, take advantage of fact that - consecutive calls to MORECORE with positive arguments always return - contiguous increasing addresses. This is true of unix sbrk. Even - if not defined, when regions happen to be contiguous, malloc will - permit allocations spanning regions obtained from different - calls. But defining this when applicable enables some stronger - consistency checks and space efficiencies. -*/ - -#ifndef MORECORE_CONTIGUOUS -#define MORECORE_CONTIGUOUS 1 -#endif - -/* - Define MORECORE_CANNOT_TRIM if your version of MORECORE - cannot release space back to the system when given negative - arguments. This is generally necessary only if you are using - a hand-crafted MORECORE function that cannot handle negative arguments. -*/ - -/* #define MORECORE_CANNOT_TRIM */ - -/* - Define HAVE_MMAP as true to optionally make malloc() use mmap() to - allocate very large blocks. These will be returned to the - operating system immediately after a free(). Also, if mmap - is available, it is used as a backup strategy in cases where - MORECORE fails to provide space from system. - - This malloc is best tuned to work with mmap for large requests. - If you do not have mmap, operations involving very large chunks (1MB - or so) may be slower than you'd like. -*/ - -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 -#endif - -#if HAVE_MMAP -/* - Standard unix mmap using /dev/zero clears memory so calloc doesn't - need to. -*/ - -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 1 -#endif - -#else /* no mmap */ -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 0 -#endif -#endif - -/* - MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if - sbrk fails, and mmap is used as a backup (which is done only if - HAVE_MMAP). The value must be a multiple of page size. This - backup strategy generally applies only when systems have "holes" in - address space, so sbrk cannot perform contiguous expansion, but - there is still space available on system. On systems for which - this is known to be useful (i.e. most linux kernels), this occurs - only when programs allocate huge amounts of memory. Between this, - and the fact that mmap regions tend to be limited, the size should - be large, to avoid too many mmap calls and thus avoid running out - of kernel resources. -*/ - -#ifndef MMAP_AS_MORECORE_SIZE -#define MMAP_AS_MORECORE_SIZE (1024 * 1024) -#endif - -/* - Define HAVE_MREMAP to make realloc() use mremap() to re-allocate - large blocks. This is currently only possible on Linux with - kernel versions newer than 1.3.77. -*/ - -#ifndef HAVE_MREMAP -#if defined(linux) && defined(__USE_GNU) -#define HAVE_MREMAP 1 -#else -#define HAVE_MREMAP 0 -#endif - -#endif /* HAVE_MMAP */ - -/* - The system page size. To the extent possible, this malloc manages - memory from the system in page-size units. Note that this value is - cached during initialization into a field of malloc_state. So even - if malloc_getpagesize is a function, it is only called once. - - The following mechanics for getpagesize were adapted from bsd/gnu - getpagesize.h. If none of the system-probes here apply, a value of - 4096 is used, which should be OK: If they don't apply, then using - the actual value probably doesn't impact performance. -*/ - -#ifndef malloc_getpagesize - -#ifndef LACKS_UNISTD_H -#include -#endif - -#ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ -#ifndef _SC_PAGE_SIZE -#define _SC_PAGE_SIZE _SC_PAGESIZE -#endif -#endif - -#ifdef _SC_PAGE_SIZE -#define malloc_getpagesize sysconf(_SC_PAGE_SIZE) -#else -#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) -extern size_t getpagesize(); -#define malloc_getpagesize getpagesize() -#else -#ifdef WIN32 /* use supplied emulation of getpagesize */ -#define malloc_getpagesize getpagesize() -#else -#ifndef LACKS_SYS_PARAM_H -#include -#endif -#ifdef EXEC_PAGESIZE -#define malloc_getpagesize EXEC_PAGESIZE -#else -#ifdef NBPG -#ifndef CLSIZE -#define malloc_getpagesize NBPG -#else -#define malloc_getpagesize (NBPG * CLSIZE) -#endif -#else -#ifdef NBPC -#define malloc_getpagesize NBPC -#else -#ifdef PAGESIZE -#define malloc_getpagesize PAGESIZE -#else /* just guess */ -#define malloc_getpagesize (4096) -#endif -#endif -#endif -#endif -#endif -#endif -#endif -#endif - -/* - This version of malloc supports the standard SVID/XPG mallinfo - routine that returns a struct containing usage properties and - statistics. It should work on any SVID/XPG compliant system that has - a /usr/include/malloc.h defining struct mallinfo. (If you'd like to - install such a thing yourself, cut out the preliminary declarations - as described above and below and save them in a malloc.h file. But - there's no compelling reason to bother to do this.) - - The main declaration needed is the mallinfo struct that is returned - (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a - bunch of fields that are not even meaningful in this version of - malloc. These fields are are instead filled by mallinfo() with - other numbers that might be of interest. - - HAVE_USR_INCLUDE_MALLOC_H should be set if you have a - /usr/include/malloc.h file that includes a declaration of struct - mallinfo. If so, it is included; else an SVID2/XPG2 compliant - version is declared below. These must be precisely the same for - mallinfo() to work. The original SVID version of this struct, - defined on most systems with mallinfo, declares all fields as - ints. But some others define as unsigned long. If your system - defines the fields using a type of different width than listed here, - you must #include your system version and #define - HAVE_USR_INCLUDE_MALLOC_H. -*/ - -/* #define HAVE_USR_INCLUDE_MALLOC_H */ - -#ifdef HAVE_USR_INCLUDE_MALLOC_H -#include "/usr/include/malloc.h" -#else - -/* SVID2/XPG mallinfo structure */ - -/* - SVID/XPG defines four standard parameter numbers for mallopt, - normally defined in malloc.h. Only one of these (M_MXFAST) is used - in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, - so setting them has no effect. But this malloc also supports other - options in mallopt described below. -*/ -#endif - -/* ---------- description of public routines ------------ */ - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or null - if no space is available. Additionally, on failure, errno is - set to ENOMEM on ANSI C systems. - - If n is zero, malloc returns a minumum-sized chunk. (The minimum - size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit - systems.) On most systems, size_t is an unsigned type, so calls - with negative arguments are interpreted as requests for huge amounts - of space, which will often fail. The maximum supported value of n - differs across systems, but is in all cases less than the maximum - representable value of a size_t. -*/ -#if __STD_C -Void_t *public_mALLOc(size_t); -#else -Void_t *public_mALLOc(); -#endif - -/* - free(Void_t* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. It can have arbitrary (i.e., bad!) - effects if p has already been freed. - - Unless disabled (using mallopt), freeing very large spaces will - when possible, automatically trigger operations that give - back unused memory to the system, thus reducing program footprint. -*/ -#if __STD_C -void public_fREe(Void_t *); -#else -void public_fREe(); -#endif - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -#if __STD_C -Void_t *public_cALLOc(size_t, size_t); -#else -Void_t *public_cALLOc(); -#endif - -/* - realloc(Void_t* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes, or null - if no space is available. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p when possible, otherwise it employs the - equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. Unless the #define - REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of - zero (re)allocates a minimum-sized chunk. - - Large chunks that were internally obtained via mmap will always - be reallocated using malloc-copy-free sequences unless - the system supports MREMAP (currently only linux). - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ -#if __STD_C -Void_t *public_rEALLOc(Void_t *, size_t); -#else -Void_t *public_rEALLOc(); -#endif - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ -#if __STD_C -Void_t *public_mEMALIGn(size_t, size_t); -#else -Void_t *public_mEMALIGn(); -#endif - -/* - valloc(size_t n); - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ -#if __STD_C -Void_t *public_vALLOc(size_t); -#else -Void_t *public_vALLOc(); -#endif - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, - normally defined in malloc.h. Only one of these (M_MXFAST) is used - in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, - so setting them has no effect. But this malloc also supports four - other options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_MXFAST 1 64 0-80 (0 disables fastbins) - M_TRIM_THRESHOLD -1 256*1024 any (-1U disables trimming) - M_TOP_PAD -2 0 any - M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) - M_MMAP_MAX -4 65536 any (0 disables use of mmap) -*/ -#if __STD_C -int public_mALLOPt(int, int); -#else -int public_mALLOPt(); -#endif - -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: the number of fastbin blocks (i.e., small chunks that - have been freed but not use resused or consolidated) - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: total bytes held in fastbin blocks - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - Because these fields are ints, but internal bookkeeping may - be kept as longs, the reported values may wrap around zero and - thus be inaccurate. -*/ -#if __STD_C -struct mallinfo public_mALLINFo(void); -#else -struct mallinfo public_mALLINFo(); -#endif - -/* - independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements that can hold contents of size elem_size, each - of which starts out cleared, and can be independently freed, - realloc'ed etc. The elements are guaranteed to be adjacently - allocated (this is not guaranteed to occur with multiple callocs or - mallocs), which may also improve cache locality in some - applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use regular calloc and assign pointers into this - space to represent elements. (In this case though, you cannot - independently free elements.) - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) die(); - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ -#if __STD_C -Void_t **public_iCALLOc(size_t, size_t, Void_t **); -#else -Void_t **public_iCALLOc(); -#endif - -/* - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use a single regular malloc, and assign pointers at - particular offsets in the aggregate space. (In this case though, you - cannot independently free elements.) - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ -#if __STD_C -Void_t **public_iCOMALLOc(size_t, size_t *, Void_t **); -#else -Void_t **public_iCOMALLOc(); -#endif - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ -#if __STD_C -Void_t *public_pVALLOc(size_t); -#else -Void_t *public_pVALLOc(); -#endif - -/* - cfree(Void_t* p); - Equivalent to free(p). - - cfree is needed/defined on some systems that pair it with calloc, - for odd historical reasons (such as: cfree is used in example - code in the first edition of K&R). -*/ -#if __STD_C -void public_cFREe(Void_t *); -#else -void public_cFREe(); -#endif - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative - arguments to sbrk) if there is unused memory at the `high' end of - the malloc pool. You can call this after freeing large blocks of - memory to potentially reduce the system-level memory requirements - of a program. However, it cannot guarantee to reduce memory. Under - some allocation patterns, some large free blocks of memory will be - locked between two used chunks, so they cannot be given back to - the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, - only the minimum amount of memory to maintain internal data - structures will be left (one page or less). Non-zero arguments - can be supplied to maintain enough trailing space to service - future expected allocations without having to re-obtain memory - from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. - On systems that do not support "negative sbrks", it will always - rreturn 0. -*/ -#if __STD_C -int public_mTRIm(size_t); -#else -int public_mTRIm(); -#endif - -/* - malloc_usable_size(Void_t* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); - -*/ -#if __STD_C -size_t public_mUSABLe(Void_t *); -#else -size_t public_mUSABLe(); -#endif - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. - -*/ -#if __STD_C -void public_mSTATs(void); -#else -void public_mSTATs(void); -#endif - -/* mallopt tuning options */ - -/* - M_MXFAST is the maximum request size used for "fastbins", special bins - that hold returned chunks without consolidating their spaces. This - enables future requests for chunks of the same size to be handled - very quickly, but can increase fragmentation, and thus increase the - overall memory footprint of a program. - - This malloc manages fastbins very conservatively yet still - efficiently, so fragmentation is rarely a problem for values less - than or equal to the default. The maximum supported value of MXFAST - is 80. You wouldn't want it any higher than this anyway. Fastbins - are designed especially for use with many small structs, objects or - strings -- the default handles structs/objects/arrays with sizes up - to 16 4byte fields, or small strings representing words, tokens, - etc. Using fastbins for larger objects normally worsens - fragmentation without improving speed. - - M_MXFAST is set in REQUEST size units. It is internally used in - chunksize units, which adds padding and alignment. You can reduce - M_MXFAST to 0 to disable all use of fastbins. This causes the malloc - algorithm to be a closer approximation of fifo-best-fit in all cases, - not just for larger requests, but will generally cause it to be - slower. -*/ - -/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ -#ifndef M_MXFAST -#define M_MXFAST 1 -#endif - -#ifndef DEFAULT_MXFAST -#define DEFAULT_MXFAST 64 -#endif - -/* - M_TRIM_THRESHOLD is the maximum amount of unused top-most memory - to keep before releasing via malloc_trim in free(). - - Automatic trimming is mainly useful in long-lived programs. - Because trimming via sbrk can be slow on some systems, and can - sometimes be wasteful (in cases where programs immediately - afterward allocate more large chunks) the value should be high - enough so that your overall system performance would improve by - releasing this much memory. - - The trim threshold and the mmap control parameters (see below) - can be traded off with one another. Trimming and mmapping are - two different ways of releasing unused memory back to the - system. Between these two, it is often possible to keep - system-level demands of a long-lived program down to a bare - minimum. For example, in one test suite of sessions measuring - the XF86 X server on Linux, using a trim threshold of 128K and a - mmap threshold of 192K led to near-minimal long term resource - consumption. - - If you are using this malloc in a long-lived program, it should - pay to experiment with these values. As a rough guide, you - might set to a value close to the average size of a process - (program) running on your system. Releasing this much memory - would allow such a process to run in memory. Generally, it's - worth it to tune for trimming rather tham memory mapping when a - program undergoes phases where several large chunks are - allocated and released in ways that can reuse each other's - storage, perhaps mixed with phases where there are no such - chunks at all. And in well-behaved long-lived programs, - controlling release of large blocks via trimming versus mapping - is usually faster. - - However, in most programs, these parameters serve mainly as - protection against the system-level effects of carrying around - massive amounts of unneeded memory. Since frequent calls to - sbrk, mmap, and munmap otherwise degrade performance, the default - parameters are set to relatively high values that serve only as - safeguards. - - The trim value must be greater than page size to have any useful - effect. To disable trimming completely, you can set to - (unsigned long)(-1) - - Trim settings interact with fastbin (MXFAST) settings: Unless - TRIM_FASTBINS is defined, automatic trimming never takes place upon - freeing a chunk with size less than or equal to MXFAST. Trimming is - instead delayed until subsequent freeing of larger chunks. However, - you can still force an attempted trim by calling malloc_trim. - - Also, trimming is not generally possible in cases where - the main arena is obtained via mmap. - - Note that the trick some people use of mallocing a huge space and - then freeing it at program startup, in an attempt to reserve system - memory, doesn't have the intended effect under automatic trimming, - since that memory will immediately be returned to the system. -*/ - -#define M_TRIM_THRESHOLD -1 - -#ifndef DEFAULT_TRIM_THRESHOLD -#define DEFAULT_TRIM_THRESHOLD (256 * 1024) -#endif - -/* - M_TOP_PAD is the amount of extra `padding' space to allocate or - retain whenever sbrk is called. It is used in two ways internally: - - * When sbrk is called to extend the top of the arena to satisfy - a new malloc request, this much padding is added to the sbrk - request. - - * When malloc_trim is called automatically from free(), - it is used as the `pad' argument. - - In both cases, the actual amount of padding is rounded - so that the end of the arena is always a system page boundary. - - The main reason for using padding is to avoid calling sbrk so - often. Having even a small pad greatly reduces the likelihood - that nearly every malloc request during program start-up (or - after trimming) will invoke sbrk, which needlessly wastes - time. - - Automatic rounding-up to page-size units is normally sufficient - to avoid measurable overhead, so the default is 0. However, in - systems where sbrk is relatively slow, it can pay to increase - this value, at the expense of carrying around more memory than - the program needs. -*/ - -#define M_TOP_PAD -2 - -#ifndef DEFAULT_TOP_PAD -#define DEFAULT_TOP_PAD (0) -#endif - -/* - M_MMAP_THRESHOLD is the request size threshold for using mmap() - to service a request. Requests of at least this size that cannot - be allocated using already-existing space will be serviced via mmap. - (If enough normal freed space already exists it is used instead.) - - Using mmap segregates relatively large chunks of memory so that - they can be individually obtained and released from the host - system. A request serviced through mmap is never reused by any - other request (at least not directly; the system may just so - happen to remap successive requests to the same locations). - - Segregating space in this way has the benefits that: - - 1. Mmapped space can ALWAYS be individually released back - to the system, which helps keep the system level memory - demands of a long-lived program low. - 2. Mapped memory can never become `locked' between - other chunks, as can happen with normally allocated chunks, which - means that even trimming via malloc_trim would not release them. - 3. On some systems with "holes" in address spaces, mmap can obtain - memory that sbrk cannot. - - However, it has the disadvantages that: - - 1. The space cannot be reclaimed, consolidated, and then - used to service later requests, as happens with normal chunks. - 2. It can lead to more wastage because of mmap page alignment - requirements - 3. It causes malloc performance to be more dependent on host - system memory management support routines which may vary in - implementation quality and may impose arbitrary - limitations. Generally, servicing a request via normal - malloc steps is faster than going through a system's mmap. - - The advantages of mmap nearly always outweigh disadvantages for - "large" chunks, but the value of "large" varies across systems. The - default is an empirically derived value that works well in most - systems. -*/ - -#define M_MMAP_THRESHOLD -3 - -#ifndef DEFAULT_MMAP_THRESHOLD -#define DEFAULT_MMAP_THRESHOLD (256 * 1024) -#endif - -/* - M_MMAP_MAX is the maximum number of requests to simultaneously - service using mmap. This parameter exists because -. Some systems have a limited number of internal tables for - use by mmap, and using more than a few of them may degrade - performance. - - The default is set to a value that serves only as a safeguard. - Setting to 0 disables use of mmap for servicing large requests. If - HAVE_MMAP is not set, the default value is 0, and attempts to set it - to non-zero values in mallopt will fail. -*/ - -#define M_MMAP_MAX -4 - -#ifndef DEFAULT_MMAP_MAX -#if HAVE_MMAP -#define DEFAULT_MMAP_MAX (65536) -#else -#define DEFAULT_MMAP_MAX (0) -#endif -#endif - -/* - ======================================================================== - To make a fully customizable malloc.h header file, cut everything - above this line, put into file malloc.h, edit to suit, and #include it - on the next line, as well as in programs that use this malloc. - ======================================================================== -*/ - -/* #include "malloc.h" */ - -/* --------------------- public wrappers ---------------------- */ - -#ifdef USE_PUBLIC_MALLOC_WRAPPERS - -/* Declare all routines as internal */ -#if __STD_C -static Void_t *mALLOc(size_t); -static void fREe(Void_t *); -static Void_t *rEALLOc(Void_t *, size_t); -static Void_t *mEMALIGn(size_t, size_t); -static Void_t *vALLOc(size_t); -static Void_t *pVALLOc(size_t); -static Void_t *cALLOc(size_t, size_t); -static Void_t **iCALLOc(size_t, size_t, Void_t **); -static Void_t **iCOMALLOc(size_t, size_t *, Void_t **); -static void cFREe(Void_t *); -static int mTRIm(size_t); -static size_t mUSABLe(Void_t *); -static void mSTATs(); -static int mALLOPt(int, int); -static struct mallinfo mALLINFo(void); -#else -static Void_t *mALLOc(); -static void fREe(); -static Void_t *rEALLOc(); -static Void_t *mEMALIGn(); -static Void_t *vALLOc(); -static Void_t *pVALLOc(); -static Void_t *cALLOc(); -static Void_t **iCALLOc(); -static Void_t **iCOMALLOc(); -static void cFREe(); -static int mTRIm(); -static size_t mUSABLe(); -static void mSTATs(); -static int mALLOPt(); -static struct mallinfo mALLINFo(); -#endif - -/* - MALLOC_PREACTION and MALLOC_POSTACTION should be - defined to return 0 on success, and nonzero on failure. - The return value of MALLOC_POSTACTION is currently ignored - in wrapper functions since there is no reasonable default - action to take on failure. -*/ - -#ifdef USE_MALLOC_LOCK - -#ifdef WIN32 - -static int mALLOC_MUTEx; -#define MALLOC_PREACTION slwait(&mALLOC_MUTEx) -#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx) - -#else - -#include - -static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER; - -#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx) -#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx) - -#endif /* USE_MALLOC_LOCK */ - -#else - -/* Substitute anything you like for these */ - -#define MALLOC_PREACTION (0) -#define MALLOC_POSTACTION (0) - -#endif - -Void_t *public_mALLOc(size_t bytes) { - Void_t *m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = mALLOc(bytes); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -void public_fREe(Void_t *m) { - if (MALLOC_PREACTION != 0) { - return; - } - fREe(m); - if (MALLOC_POSTACTION != 0) { - } -} - -Void_t *public_rEALLOc(Void_t *m, size_t bytes) { - if (MALLOC_PREACTION != 0) { - return 0; - } - m = rEALLOc(m, bytes); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t *public_mEMALIGn(size_t alignment, size_t bytes) { - Void_t *m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = mEMALIGn(alignment, bytes); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t *public_vALLOc(size_t bytes) { - Void_t *m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = vALLOc(bytes); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t *public_pVALLOc(size_t bytes) { - Void_t *m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = pVALLOc(bytes); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t *public_cALLOc(size_t n, size_t elem_size) { - Void_t *m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = cALLOc(n, elem_size); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t **public_iCALLOc(size_t n, size_t elem_size, Void_t **chunks) { - Void_t **m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = iCALLOc(n, elem_size, chunks); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -Void_t **public_iCOMALLOc(size_t n, size_t sizes[], Void_t **chunks) { - Void_t **m; - if (MALLOC_PREACTION != 0) { - return 0; - } - m = iCOMALLOc(n, sizes, chunks); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -void public_cFREe(Void_t *m) { - if (MALLOC_PREACTION != 0) { - return; - } - cFREe(m); - if (MALLOC_POSTACTION != 0) { - } -} - -int public_mTRIm(size_t s) { - int result; - if (MALLOC_PREACTION != 0) { - return 0; - } - result = mTRIm(s); - if (MALLOC_POSTACTION != 0) { - } - return result; -} - -size_t public_mUSABLe(Void_t *m) { - size_t result; - if (MALLOC_PREACTION != 0) { - return 0; - } - result = mUSABLe(m); - if (MALLOC_POSTACTION != 0) { - } - return result; -} - -void public_mSTATs() { - if (MALLOC_PREACTION != 0) { - return; - } - mSTATs(); - if (MALLOC_POSTACTION != 0) { - } -} - -struct mallinfo public_mALLINFo() { - struct mallinfo m; - if (MALLOC_PREACTION != 0) { - struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return nm; - } - m = mALLINFo(); - if (MALLOC_POSTACTION != 0) { - } - return m; -} - -int public_mALLOPt(int p, int v) { - int result; - if (MALLOC_PREACTION != 0) { - return 0; - } - result = mALLOPt(p, v); - if (MALLOC_POSTACTION != 0) { - } - return result; -} - -#endif - -/* ------------- Optional versions of memcopy ---------------- */ - -#if USE_MEMCPY - -/* - Note: memcpy is ONLY invoked with non-overlapping regions, - so the (usually slower) memmove is not needed. -*/ - -#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) -#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) - -#else /* !USE_MEMCPY */ - -/* Use Duff's device for good zeroing/copying performance. */ - -#define MALLOC_ZERO(charp, nbytes) \ - do { \ - INTERNAL_SIZE_T *mzp = (INTERNAL_SIZE_T *)(charp); \ - CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) \ - mcn = 0; \ - else { \ - mcn = (mctmp - 1) / 8; \ - mctmp %= 8; \ - } \ - switch (mctmp) { \ - case 0: \ - for (;;) { \ - *mzp++ = 0; \ - case 7: \ - *mzp++ = 0; \ - case 6: \ - *mzp++ = 0; \ - case 5: \ - *mzp++ = 0; \ - case 4: \ - *mzp++ = 0; \ - case 3: \ - *mzp++ = 0; \ - case 2: \ - *mzp++ = 0; \ - case 1: \ - *mzp++ = 0; \ - if (mcn <= 0) \ - break; \ - mcn--; \ - } \ - } \ - } while (0) - -#define MALLOC_COPY(dest, src, nbytes) \ - do { \ - INTERNAL_SIZE_T *mcsrc = (INTERNAL_SIZE_T *)src; \ - INTERNAL_SIZE_T *mcdst = (INTERNAL_SIZE_T *)dest; \ - CHUNK_SIZE_T mctmp = (nbytes) / sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) \ - mcn = 0; \ - else { \ - mcn = (mctmp - 1) / 8; \ - mctmp %= 8; \ - } \ - switch (mctmp) { \ - case 0: \ - for (;;) { \ - *mcdst++ = *mcsrc++; \ - case 7: \ - *mcdst++ = *mcsrc++; \ - case 6: \ - *mcdst++ = *mcsrc++; \ - case 5: \ - *mcdst++ = *mcsrc++; \ - case 4: \ - *mcdst++ = *mcsrc++; \ - case 3: \ - *mcdst++ = *mcsrc++; \ - case 2: \ - *mcdst++ = *mcsrc++; \ - case 1: \ - *mcdst++ = *mcsrc++; \ - if (mcn <= 0) \ - break; \ - mcn--; \ - } \ - } \ - } while (0) - -#endif - -/* ------------------ MMAP support ------------------ */ - -#if HAVE_MMAP - -#ifndef LACKS_FCNTL_H -#include -#endif - -#ifndef LACKS_SYS_MMAN_H -#include -#endif - -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif - -/* - Nearly all versions of mmap support MAP_ANONYMOUS, - so the following is unlikely to be needed, but is - supplied just in case. -*/ - -#ifndef MAP_ANONYMOUS - -static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ - -#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) \ - : mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) - -#else - -#define MMAP(addr, size, prot, flags) \ - (mmap((addr), (size), (prot), (flags) | MAP_ANONYMOUS, -1, 0)) - -#endif - -#endif /* HAVE_MMAP */ - -/* - ----------------------- Chunk representations ----------------------- -*/ - -/* - This struct declaration is misleading (but accurate and necessary). - It declares a "view" into memory allowing access to necessary - fields at known offsets from a given base. See explanation below. -*/ - -struct malloc_chunk { - - INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ - INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ - - struct malloc_chunk *fd; /* double links -- used only if free. */ - struct malloc_chunk *bk; -}; - -typedef struct malloc_chunk *mchunkptr; - -/* - malloc_chunk details: - - (The following includes lightly edited explanations by Colin Plumb.) - - Chunks of memory are maintained using a `boundary tag' method as - described in e.g., Knuth or Standish. (See the paper by Paul - Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a - survey of such techniques.) Sizes of free chunks are stored both - in the front of each chunk and at the end. This makes - consolidating fragmented chunks into bigger chunks very fast. The - size fields also hold bits representing whether chunks are free or - in use. - - An allocated chunk looks like this: - - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk, if allocated | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | User data starts here... . - . . - . (malloc_usable_space() bytes) . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Where "chunk" is the front of the chunk for the purpose of most of - the malloc code, but "mem" is the pointer that is returned to the - user. "Nextchunk" is the beginning of the next contiguous chunk. - - Chunks always begin on even word boundries, so the mem portion - (which is returned to the user) is also on an even word boundary, and - thus at least double-word aligned. - - Free chunks are stored in circular doubly-linked lists, and look like this: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space (may be 0 bytes long) . - . . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - The P (PREV_INUSE) bit, stored in the unused low-order bit of the - chunk size (which is always a multiple of two words), is an in-use - bit for the *previous* chunk. If that bit is *clear*, then the - word before the current chunk size contains the previous chunk - size, and can be used to find the front of the previous chunk. - The very first chunk allocated always has this bit set, - preventing access to non-existent (or non-owned) memory. If - prev_inuse is set for any given chunk, then you CANNOT determine - the size of the previous chunk, and might even get a memory - addressing fault when trying to do so. - - Note that the `foot' of the current chunk is actually represented - as the prev_size of the NEXT chunk. This makes it easier to - deal with alignments etc but can be very confusing when trying - to extend or adapt this code. - - The two exceptions to all this are - - 1. The special chunk `top' doesn't bother using the - trailing size field since there is no next contiguous chunk - that would have to index off it. After initialization, `top' - is forced to always exist. If it would become less than - MINSIZE bytes long, it is replenished. - - 2. Chunks allocated via mmap, which have the second-lowest-order - bit (IS_MMAPPED) set in their size fields. Because they are - allocated one-by-one, each must contain its own trailing size field. - -*/ - -/* - ---------- Size and alignment checks and conversions ---------- -*/ - -/* conversion from malloc headers to user pointers, and back */ - -#define chunk2mem(p) ((Void_t *)((char *)(p) + 2 * SIZE_SZ)) -#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - 2 * SIZE_SZ)) - -/* The smallest possible chunk */ -#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) - -/* The smallest size we can malloc is an aligned minimal chunk */ - -#define MINSIZE \ - (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) - -/* Check if m has acceptable alignment */ - -#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0) - -/* - Check if a request is so large that it would wrap around zero when - padded and aligned. To simplify some other code, the bound is made - low enough so that adding MINSIZE will also not wrap around sero. -*/ - -#define REQUEST_OUT_OF_RANGE(req) \ - ((CHUNK_SIZE_T)(req) >= \ - (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE)) - -/* pad request bytes into a usable size -- internal version */ - -#define request2size(req) \ - (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) - -/* Same, except also perform argument check */ - -#define checked_request2size(req, sz) \ - if (REQUEST_OUT_OF_RANGE(req)) { \ - MALLOC_FAILURE_ACTION; \ - return 0; \ - } \ - (sz) = request2size(req); - -/* - --------------- Physical chunk operations --------------- -*/ - -/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ -#define PREV_INUSE 0x1 - -/* extract inuse bit of previous chunk */ -#define prev_inuse(p) ((p)->size & PREV_INUSE) - -/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ -#define IS_MMAPPED 0x2 - -/* check for mmap()'ed chunk */ -#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) - -/* - Bits to mask off when extracting size - - Note: IS_MMAPPED is intentionally not masked off from size field in - macros for which mmapped chunks should never be seen. This should - cause helpful core dumps to occur if it is tried by accident by - people extending or adapting this malloc. -*/ -#define SIZE_BITS (PREV_INUSE | IS_MMAPPED) - -/* Get size, ignoring use bits */ -#define chunksize(p) ((p)->size & ~(SIZE_BITS)) - -/* Ptr to next physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE))) - -/* Ptr to previous physical malloc_chunk */ -#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_size))) - -/* Treat space at ptr + offset as a chunk */ -#define chunk_at_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) - -/* extract p's inuse bit */ -#define inuse(p) \ - ((((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE) - -/* set/clear chunk as being inuse without otherwise disturbing */ -#define set_inuse(p) \ - ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE - -#define clear_inuse(p) \ - ((mchunkptr)(((char *)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE) - -/* check/set/clear inuse bits in known places */ -#define inuse_bit_at_offset(p, s) \ - (((mchunkptr)(((char *)(p)) + (s)))->size & PREV_INUSE) - -#define set_inuse_bit_at_offset(p, s) \ - (((mchunkptr)(((char *)(p)) + (s)))->size |= PREV_INUSE) - -#define clear_inuse_bit_at_offset(p, s) \ - (((mchunkptr)(((char *)(p)) + (s)))->size &= ~(PREV_INUSE)) - -/* Set size at head, without disturbing its use bit */ -#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s))) - -/* Set size/use field */ -#define set_head(p, s) ((p)->size = (s)) - -/* Set size at footer (only when chunk is not in use) */ -#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_size = (s)) - -/* - -------------------- Internal data structures -------------------- - - All internal state is held in an instance of malloc_state defined - below. There are no other static variables, except in two optional - cases: - * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. - * If HAVE_MMAP is true, but mmap doesn't support - MAP_ANONYMOUS, a dummy file descriptor for mmap. - - Beware of lots of tricks that minimize the total bookkeeping space - requirements. The result is a little over 1K bytes (for 4byte - pointers and size_t.) -*/ - -/* - Bins - - An array of bin headers for free chunks. Each bin is doubly - linked. The bins are approximately proportionally (log) spaced. - There are a lot of these bins (128). This may look excessive, but - works very well in practice. Most bins hold sizes that are - unusual as malloc request sizes, but are more usual for fragments - and consolidated sets of chunks, which is what these bins hold, so - they can be found quickly. All procedures maintain the invariant - that no consolidated chunk physically borders another one, so each - chunk in a list is known to be preceeded and followed by either - inuse chunks or the ends of memory. - - Chunks in bins are kept in size order, with ties going to the - approximately least recently used chunk. Ordering isn't needed - for the small bins, which all contain the same-sized chunks, but - facilitates best-fit allocation for larger chunks. These lists - are just sequential. Keeping them in order almost never requires - enough traversal to warrant using fancier ordered data - structures. - - Chunks of the same size are linked with the most - recently freed at the front, and allocations are taken from the - back. This results in LRU (FIFO) allocation order, which tends - to give each chunk an equal opportunity to be consolidated with - adjacent freed chunks, resulting in larger free chunks and less - fragmentation. - - To simplify use in double-linked lists, each bin header acts - as a malloc_chunk. This avoids special-casing for headers. - But to conserve space and improve locality, we allocate - only the fd/bk pointers of bins, and then use repositioning tricks - to treat these as the fields of a malloc_chunk*. -*/ - -typedef struct malloc_chunk *mbinptr; - -/* addressing -- note that bin_at(0) does not exist */ -#define bin_at(m, i) ((mbinptr)((char *)&((m)->bins[(i) << 1]) - (SIZE_SZ << 1))) - -/* analog of ++bin */ -#define next_bin(b) ((mbinptr)((char *)(b) + (sizeof(mchunkptr) << 1))) - -/* Reminders about list directionality within bins */ -#define first(b) ((b)->fd) -#define last(b) ((b)->bk) - -/* Take a chunk off a bin list */ -#define unlink(P, BK, FD) \ - { \ - FD = P->fd; \ - BK = P->bk; \ - FD->bk = BK; \ - BK->fd = FD; \ - } - -/* - Indexing - - Bins for sizes < 512 bytes contain chunks of all the same size, spaced - 8 bytes apart. Larger bins are approximately logarithmically spaced: - - 64 bins of size 8 - 32 bins of size 64 - 16 bins of size 512 - 8 bins of size 4096 - 4 bins of size 32768 - 2 bins of size 262144 - 1 bin of size what's left - - The bins top out around 1MB because we expect to service large - requests via mmap. -*/ - -#define NBINS 96 -#define NSMALLBINS 32 -#define SMALLBIN_WIDTH 8 -#define MIN_LARGE_SIZE 256 - -#define in_smallbin_range(sz) \ - ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE) - -#define smallbin_index(sz) (((unsigned)(sz)) >> 3) - -/* - Compute index for size. We expect this to be inlined when - compiled with optimization, else not, which works out well. -*/ -static int largebin_index(unsigned int sz) { - unsigned int x = sz >> SMALLBIN_WIDTH; - unsigned int m; /* bit position of highest set bit of m */ - - if (x >= 0x10000) { - return NBINS - 1; - } - - /* On intel, use BSRL instruction to find highest bit */ -#if defined(__GNUC__) && defined(i386) - - __asm__("bsrl %1,%0\n\t" - : "=r"(m) - : "g"(x)); - -#else - { - /* - Based on branch-free nlz algorithm in chapter 5 of Henry - S. Warren Jr's book "Hacker's Delight". - */ - - unsigned int n = ((x - 0x100) >> 16) & 8; - x <<= n; - m = ((x - 0x1000) >> 16) & 4; - n += m; - x <<= m; - m = ((x - 0x4000) >> 16) & 2; - n += m; - x = (x << m) >> 14; - m = 13 - n + (x & ~(x >> 1)); - } -#endif - - /* Use next 2 bits to create finer-granularity bins */ - return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3); -} - -#define bin_index(sz) \ - ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) - -/* - FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the - first bin that is maintained in sorted order. This must - be the smallest size corresponding to a given bin. - - Normally, this should be MIN_LARGE_SIZE. But you can weaken - best fit guarantees to sometimes speed up malloc by increasing value. - Doing this means that malloc may choose a chunk that is - non-best-fitting by up to the width of the bin. - - Some useful cutoff values: - 512 - all bins sorted - 2560 - leaves bins <= 64 bytes wide unsorted - 12288 - leaves bins <= 512 bytes wide unsorted - 65536 - leaves bins <= 4096 bytes wide unsorted - 262144 - leaves bins <= 32768 bytes wide unsorted - -1 - no bins sorted (not recommended!) -*/ - -#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE -/* #define FIRST_SORTED_BIN_SIZE 65536 */ - -/* - Unsorted chunks - - All remainders from chunk splits, as well as all returned chunks, - are first placed in the "unsorted" bin. They are then placed - in regular bins after malloc gives them ONE chance to be used before - binning. So, basically, the unsorted_chunks list acts as a queue, - with chunks being placed on it in free (and malloc_consolidate), - and taken off (to be either used or placed in bins) in malloc. -*/ - -/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ -#define unsorted_chunks(M) (bin_at(M, 1)) - -/* - Top - - The top-most available chunk (i.e., the one bordering the end of - available memory) is treated specially. It is never included in - any bin, is used only if no other chunk is available, and is - released back to the system if it is very large (see - M_TRIM_THRESHOLD). Because top initially - points to its own bin with initial zero size, thus forcing - extension on the first malloc request, we avoid having any special - code in malloc to check whether it even exists yet. But we still - need to do so when getting memory from system, so we make - initial_top treat the bin as a legal but unusable chunk during the - interval between initialization and the first call to - sYSMALLOc. (This is somewhat delicate, since it relies on - the 2 preceding words to be zero during this interval as well.) -*/ - -/* Conveniently, the unsorted bin can be used as dummy top on first call */ -#define initial_top(M) (unsorted_chunks(M)) - -/* - Binmap - - To help compensate for the large number of bins, a one-level index - structure is used for bin-by-bin searching. `binmap' is a - bitvector recording whether bins are definitely empty so they can - be skipped over during during traversals. The bits are NOT always - cleared as soon as bins are empty, but instead only - when they are noticed to be empty during traversal in malloc. -*/ - -/* Conservatively use 32 bits per map word, even if on 64bit system */ -#define BINMAPSHIFT 5 -#define BITSPERMAP (1U << BINMAPSHIFT) -#define BINMAPSIZE (NBINS / BITSPERMAP) - -#define idx2block(i) ((i) >> BINMAPSHIFT) -#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1)))) - -#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) -#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) -#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i)) - -/* - Fastbins - - An array of lists holding recently freed small chunks. Fastbins - are not doubly linked. It is faster to single-link them, and - since chunks are never removed from the middles of these lists, - double linking is not necessary. Also, unlike regular bins, they - are not even processed in FIFO order (they use faster LIFO) since - ordering doesn't much matter in the transient contexts in which - fastbins are normally used. - - Chunks in fastbins keep their inuse bit set, so they cannot - be consolidated with other free chunks. malloc_consolidate - releases all chunks in fastbins and consolidates them with - other free chunks. -*/ - -typedef struct malloc_chunk *mfastbinptr; - -/* offset 2 to use otherwise unindexable first 2 bins */ -#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) - -/* The maximum fastbin request size we support */ -#define MAX_FAST_SIZE 80 - -#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE)) + 1) - -/* - FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() - that triggers automatic consolidation of possibly-surrounding - fastbin chunks. This is a heuristic, so the exact value should not - matter too much. It is defined at half the default trim threshold as a - compromise heuristic to only attempt consolidation if it is likely - to lead to trimming. However, it is not dynamically tunable, since - consolidation reduces fragmentation surrounding loarge chunks even - if trimming is not used. -*/ - -#define FASTBIN_CONSOLIDATION_THRESHOLD \ - ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1) - -/* - Since the lowest 2 bits in max_fast don't matter in size comparisons, - they are used as flags. -*/ - -/* - ANYCHUNKS_BIT held in max_fast indicates that there may be any - freed chunks at all. It is set true when entering a chunk into any - bin. -*/ - -#define ANYCHUNKS_BIT (1U) - -#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT)) -#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT) -#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT) - -/* - FASTCHUNKS_BIT held in max_fast indicates that there are probably - some fastbin chunks. It is set true on entering a chunk into any - fastbin, and cleared only in malloc_consolidate. -*/ - -#define FASTCHUNKS_BIT (2U) - -#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT)) -#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT | ANYCHUNKS_BIT)) -#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT)) - -/* - Set value of max_fast. - Use impossibly small value if 0. -*/ - -#define set_max_fast(M, s) \ - (M)->max_fast = (((s) == 0) ? SMALLBIN_WIDTH : request2size(s)) | \ - ((M)->max_fast & (FASTCHUNKS_BIT | ANYCHUNKS_BIT)) - -#define get_max_fast(M) \ - ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT)) - -/* - morecore_properties is a status word holding dynamically discovered - or controlled properties of the morecore function -*/ - -#define MORECORE_CONTIGUOUS_BIT (1U) - -#define contiguous(M) \ - (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT)) -#define noncontiguous(M) \ - (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0) -#define set_contiguous(M) \ - ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT) -#define set_noncontiguous(M) \ - ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT) - -/* - ----------- Internal state representation and initialization ----------- -*/ - -struct malloc_state { - - /* The maximum chunk size to be eligible for fastbin */ - INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ - - /* Fastbins */ - mfastbinptr fastbins[NFASTBINS]; - - /* Base of the topmost chunk -- not otherwise kept in a bin */ - mchunkptr top; - - /* The remainder from the most recent split of a small request */ - mchunkptr last_remainder; - - /* Normal bins packed as described above */ - mchunkptr bins[NBINS * 2]; - - /* Bitmap of bins. Trailing zero map handles cases of largest binned size */ - unsigned int binmap[BINMAPSIZE + 1]; - - /* Tunable parameters */ - CHUNK_SIZE_T trim_threshold; - INTERNAL_SIZE_T top_pad; - INTERNAL_SIZE_T mmap_threshold; - - /* Memory map support */ - int n_mmaps; - int n_mmaps_max; - int max_n_mmaps; - - /* Cache malloc_getpagesize */ - unsigned int pagesize; - - /* Track properties of MORECORE */ - unsigned int morecore_properties; - - /* Statistics */ - INTERNAL_SIZE_T mmapped_mem; - INTERNAL_SIZE_T sbrked_mem; - INTERNAL_SIZE_T max_sbrked_mem; - INTERNAL_SIZE_T max_mmapped_mem; - INTERNAL_SIZE_T max_total_mem; -}; - -typedef struct malloc_state *mstate; - -/* - There is exactly one instance of this struct in this malloc. - If you are adapting this malloc in a way that does NOT use a static - malloc_state, you MUST explicitly zero-fill it before using. This - malloc relies on the property that malloc_state is initialized to - all zeroes (as is true of C statics). -*/ - -static struct malloc_state av_; /* never directly referenced */ - -/* - All uses of av_ are via get_malloc_state(). - At most one "call" to get_malloc_state is made per invocation of - the public versions of malloc and free, but other routines - that in turn invoke malloc and/or free may call more then once. - Also, it is called in check* routines if DL_DEBUG is set. -*/ - -#define get_malloc_state() (&(av_)) - -/* - Initialize a malloc_state struct. - - This is called only from within malloc_consolidate, which needs - be called in the same contexts anyway. It is never called directly - outside of malloc_consolidate because some optimizing compilers try - to inline it at all call points, which turns out not to be an - optimization at all. (Inlining it in malloc_consolidate is fine though.) -*/ - -#if __STD_C -static void malloc_init_state(mstate av) -#else -static void malloc_init_state(av) mstate av; -#endif -{ - int i; - mbinptr bin; - - /* Establish circular links for normal bins */ - for (i = 1; i < NBINS; ++i) { - bin = bin_at(av, i); - bin->fd = bin->bk = bin; - } - - av->top_pad = DEFAULT_TOP_PAD; - av->n_mmaps_max = DEFAULT_MMAP_MAX; - av->mmap_threshold = DEFAULT_MMAP_THRESHOLD; - av->trim_threshold = DEFAULT_TRIM_THRESHOLD; - -#if MORECORE_CONTIGUOUS - set_contiguous(av); -#else - set_noncontiguous(av); -#endif - - set_max_fast(av, DEFAULT_MXFAST); - - av->top = initial_top(av); - av->pagesize = malloc_getpagesize; -} - -/* - Other internal utilities operating on mstates -*/ - -#if __STD_C -static Void_t *sYSMALLOc(INTERNAL_SIZE_T, mstate); -static int sYSTRIm(size_t, mstate); -static void malloc_consolidate(mstate); -static Void_t **iALLOc(size_t, size_t *, int, Void_t **); -#else -static Void_t *sYSMALLOc(); -static int sYSTRIm(); -static void malloc_consolidate(); -static Void_t **iALLOc(); -#endif - -/* - Debugging support - - These routines make a number of assertions about the states - of data structures that should be true at all times. If any - are not true, it's very likely that a user program has somehow - trashed memory. (It's also possible that there is a coding error - in malloc. In which case, please report it!) -*/ - -#if !DL_DEBUG - -#define check_chunk(P) -#define check_free_chunk(P) -#define check_inuse_chunk(P) -#define check_remalloced_chunk(P, N) -#define check_malloced_chunk(P, N) -#define check_malloc_state() - -#else -#define check_chunk(P) do_check_chunk(P) -#define check_free_chunk(P) do_check_free_chunk(P) -#define check_inuse_chunk(P) do_check_inuse_chunk(P) -#define check_remalloced_chunk(P, N) do_check_remalloced_chunk(P, N) -#define check_malloced_chunk(P, N) do_check_malloced_chunk(P, N) -#define check_malloc_state() do_check_malloc_state() - -/* - Properties of all chunks -*/ - -#if __STD_C -static void do_check_chunk(mchunkptr p) -#else -static void do_check_chunk(p) mchunkptr p; -#endif -{ - mstate av = get_malloc_state(); - CHUNK_SIZE_T sz = chunksize(p); - /* min and max possible addresses assuming contiguous allocation */ - char *max_address = (char *)(av->top) + chunksize(av->top); - char *min_address = max_address - av->sbrked_mem; - - if (!chunk_is_mmapped(p)) { - - /* Has legal address ... */ - if (p != av->top) { - if (contiguous(av)) { - assert(((char *)p) >= min_address); - assert(((char *)p + sz) <= ((char *)(av->top))); - } - } else { - /* top size is always at least MINSIZE */ - assert((CHUNK_SIZE_T)(sz) >= MINSIZE); - /* top predecessor always marked inuse */ - assert(prev_inuse(p)); - } - } else { -#if HAVE_MMAP - /* address is outside main heap */ - if (contiguous(av) && av->top != initial_top(av)) { - assert(((char *)p) < min_address || ((char *)p) > max_address); - } - /* chunk is page-aligned */ - assert(((p->prev_size + sz) & (av->pagesize - 1)) == 0); - /* mem is aligned */ - assert(aligned_OK(chunk2mem(p))); -#else - /* force an appropriate assert violation if debug set */ - assert(!chunk_is_mmapped(p)); -#endif - } -} - -/* - Properties of free chunks -*/ - -#if __STD_C -static void do_check_free_chunk(mchunkptr p) -#else -static void do_check_free_chunk(p) mchunkptr p; -#endif -{ - mstate av = get_malloc_state(); - - INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; - mchunkptr next = chunk_at_offset(p, sz); - - do_check_chunk(p); - - /* Chunk must claim to be free ... */ - assert(!inuse(p)); - assert(!chunk_is_mmapped(p)); - - /* Unless a special marker, must have OK fields */ - if ((CHUNK_SIZE_T)(sz) >= MINSIZE) { - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert(aligned_OK(chunk2mem(p))); - /* ... matching footer field */ - assert(next->prev_size == sz); - /* ... and is fully consolidated */ - assert(prev_inuse(p)); - assert(next == av->top || inuse(next)); - - /* ... and has minimally sane links */ - assert(p->fd->bk == p); - assert(p->bk->fd == p); - } else { /* markers are always of size SIZE_SZ */ - assert(sz == SIZE_SZ); - } -} - -/* - Properties of inuse chunks -*/ - -#if __STD_C -static void do_check_inuse_chunk(mchunkptr p) -#else -static void do_check_inuse_chunk(p) mchunkptr p; -#endif -{ - mstate av = get_malloc_state(); - mchunkptr next; - do_check_chunk(p); - - if (chunk_is_mmapped(p)) { - return; /* mmapped chunks have no next/prev */ - } - - /* Check whether it claims to be in use ... */ - assert(inuse(p)); - - next = next_chunk(p); - - /* ... and is surrounded by OK chunks. - Since more things can be checked with free chunks than inuse ones, - if an inuse chunk borders them and debug is on, it's worth doing them. - */ - if (!prev_inuse(p)) { - /* Note that we cannot even look at prev unless it is not inuse */ - mchunkptr prv = prev_chunk(p); - assert(next_chunk(prv) == p); - do_check_free_chunk(prv); - } - - if (next == av->top) { - assert(prev_inuse(next)); - assert(chunksize(next) >= MINSIZE); - } else if (!inuse(next)) { - do_check_free_chunk(next); - } -} - -/* - Properties of chunks recycled from fastbins -*/ - -#if __STD_C -static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_remalloced_chunk(p, s) mchunkptr p; -INTERNAL_SIZE_T s; -#endif -{ - INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; - - do_check_inuse_chunk(p); - - /* Legal size ... */ - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert((CHUNK_SIZE_T)(sz) >= MINSIZE); - /* ... and alignment */ - assert(aligned_OK(chunk2mem(p))); - /* chunk is less than MINSIZE more than request */ - assert((long)(sz) - (long)(s) >= 0); - assert((long)(sz) - (long)(s + MINSIZE) < 0); -} - -/* - Properties of nonrecycled chunks at the point they are malloced -*/ - -#if __STD_C -static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_malloced_chunk(p, s) mchunkptr p; -INTERNAL_SIZE_T s; -#endif -{ - /* same as recycled case ... */ - do_check_remalloced_chunk(p, s); - - /* - ... plus, must obey implementation invariant that prev_inuse is - always true of any allocated chunk; i.e., that each allocated - chunk borders either a previously allocated and still in-use - chunk, or the base of its memory arena. This is ensured - by making all allocations from the the `lowest' part of any found - chunk. This does not necessarily hold however for chunks - recycled via fastbins. - */ - - assert(prev_inuse(p)); -} - -/* - Properties of malloc_state. - - This may be useful for debugging malloc, as well as detecting user - programmer errors that somehow write into malloc_state. - - If you are extending or experimenting with this malloc, you can - probably figure out how to hack this routine to print out or - display chunk addresses, sizes, bins, and other instrumentation. -*/ - -static void do_check_malloc_state(void) { - mstate av = get_malloc_state(); - int i; - mchunkptr p; - mchunkptr q; - mbinptr b; - unsigned int binbit; - int empty; - unsigned int idx; - INTERNAL_SIZE_T size; - CHUNK_SIZE_T total = 0; - int max_fast_bin; - - /* internal size_t must be no wider than pointer type */ - assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char *)); - - /* alignment is a power of 2 */ - assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0); - - /* cannot run remaining checks until fully initialized */ - if (av->top == 0 || av->top == initial_top(av)) { - return; - } - - /* pagesize is a power of 2 */ - assert((av->pagesize & (av->pagesize - 1)) == 0); - - /* properties of fastbins */ - - /* max_fast is in allowed range */ - assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE)); - - max_fast_bin = fastbin_index(av->max_fast); - - for (i = 0; NFASTBINS - i > 0; ++i) { - p = av->fastbins[i]; - - /* all bins past max_fast are empty */ - if (i > max_fast_bin) { - assert(p == 0); - } - - while (p != 0) { - /* each chunk claims to be inuse */ - do_check_inuse_chunk(p); - total += chunksize(p); - /* chunk belongs in this bin */ - assert(fastbin_index(chunksize(p)) == i); - p = p->fd; - } - } - - if (total != 0) { - assert(have_fastchunks(av)); - } else if (!have_fastchunks(av)) { - assert(total == 0); - } - - /* check normal bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av, i); - - /* binmap is accurate (except for bin 1 == unsorted_chunks) */ - if (i >= 2) { - binbit = get_binmap(av, i); - empty = last(b) == b; - if (!binbit) { - assert(empty); - } else if (!empty) { - assert(binbit); - } - } - - for (p = last(b); p != b; p = p->bk) { - /* each chunk claims to be free */ - do_check_free_chunk(p); - size = chunksize(p); - total += size; - if (i >= 2) { - /* chunk belongs in bin */ - idx = bin_index(size); - assert(idx == i); - /* lists are sorted */ - if ((CHUNK_SIZE_T)size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { - assert(p->bk == b || - (CHUNK_SIZE_T)chunksize(p->bk) >= - (CHUNK_SIZE_T)chunksize(p)); - } - } - /* chunk is followed by a legal chain of inuse chunks */ - for (q = next_chunk(p); - (q != av->top && inuse(q) && - (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE); - q = next_chunk(q)) { - do_check_inuse_chunk(q); - } - } - } - - /* top chunk is OK */ - check_chunk(av->top); - - /* sanity checks for statistics */ - - assert(total <= (CHUNK_SIZE_T)(av->max_total_mem)); - assert(av->n_mmaps >= 0); - assert(av->n_mmaps <= av->max_n_mmaps); - - assert((CHUNK_SIZE_T)(av->sbrked_mem) <= - (CHUNK_SIZE_T)(av->max_sbrked_mem)); - - assert((CHUNK_SIZE_T)(av->mmapped_mem) <= - (CHUNK_SIZE_T)(av->max_mmapped_mem)); - - assert((CHUNK_SIZE_T)(av->max_total_mem) >= - (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem)); -} -#endif - -/* ----------- Routines dealing with system allocation -------------- */ - -/* - sysmalloc handles malloc cases requiring more memory from the system. - On entry, it is assumed that av->top does not have enough - space to service request for nb bytes, thus requiring that av->top - be extended or replaced. -*/ - -#if __STD_C -static Void_t *sYSMALLOc(INTERNAL_SIZE_T nb, mstate av) -#else -static Void_t *sYSMALLOc(nb, av) -INTERNAL_SIZE_T nb; -mstate av; -#endif -{ - mchunkptr old_top; /* incoming value of av->top */ - INTERNAL_SIZE_T old_size; /* its size */ - char *old_end; /* its end address */ - - long size; /* arg to first MORECORE or mmap call */ - char *brk; /* return value from MORECORE */ - - long correction; /* arg to 2nd MORECORE call */ - char *snd_brk; /* 2nd return val */ - - INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ - INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ - char *aligned_brk; /* aligned offset into brk */ - - mchunkptr p; /* the allocated/returned chunk */ - mchunkptr remainder; /* remainder from allocation */ - CHUNK_SIZE_T remainder_size; /* its size */ - - CHUNK_SIZE_T sum; /* for updating stats */ - - size_t pagemask = av->pagesize - 1; - - /* - If there is space available in fastbins, consolidate and retry - malloc from scratch rather than getting memory from system. This - can occur only if nb is in smallbin range so we didn't consolidate - upon entry to malloc. It is much easier to handle this case here - than in malloc proper. - */ - - if (have_fastchunks(av)) { - assert(in_smallbin_range(nb)); - malloc_consolidate(av); - return mALLOc(nb - MALLOC_ALIGN_MASK); - } - -#if HAVE_MMAP - - /* - If have mmap, and the request size meets the mmap threshold, and - the system supports mmap, and there are few enough currently - allocated mmapped regions, try to directly map this request - rather than expanding top. - */ - - if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) && - (av->n_mmaps < av->n_mmaps_max)) { - - char *mm; /* return value from mmap call*/ - - /* - Round up size to nearest page. For mmapped chunks, the overhead - is one SIZE_SZ unit larger than for normal chunks, because there - is no following chunk whose prev_size field could be used. - */ - size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; - - /* Don't try if size wraps around 0 */ - if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { - - mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE)); - - if (mm != (char *)(MORECORE_FAILURE)) { - - /* - The offset to the start of the mmapped region is stored - in the prev_size field of the chunk. This allows us to adjust - returned start address to meet alignment requirements here - and in memalign(), and still be able to compute proper - address argument for later munmap in free() and realloc(). - */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - correction = MALLOC_ALIGNMENT - front_misalign; - p = (mchunkptr)(mm + correction); - p->prev_size = correction; - set_head(p, (size - correction) | IS_MMAPPED); - } else { - p = (mchunkptr)mm; - p->prev_size = 0; - set_head(p, size | IS_MMAPPED); - } - - /* update statistics */ - - if (++av->n_mmaps > av->max_n_mmaps) { - av->max_n_mmaps = av->n_mmaps; - } - - sum = av->mmapped_mem += size; - if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) { - av->max_mmapped_mem = sum; - } - sum += av->sbrked_mem; - if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { - av->max_total_mem = sum; - } - - check_chunk(p); - - return chunk2mem(p); - } - } - } -#endif - - /* Record incoming configuration of top */ - - old_top = av->top; - old_size = chunksize(old_top); - old_end = (char *)(chunk_at_offset(old_top, old_size)); - - brk = snd_brk = (char *)(MORECORE_FAILURE); - - /* - If not the first time through, we require old_size to be - at least MINSIZE and to have prev_inuse set. - */ - - assert((old_top == initial_top(av) && old_size == 0) || - ((CHUNK_SIZE_T)(old_size) >= MINSIZE && - prev_inuse(old_top))); - - /* Precondition: not enough current space to satisfy nb request */ - assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE)); - - /* Precondition: all fastbins are consolidated */ - assert(!have_fastchunks(av)); - - /* Request enough space for nb + pad + overhead */ - - size = nb + av->top_pad + MINSIZE; - - /* - If contiguous, we can subtract out existing space that we hope to - combine with new space. We add it back later only if - we don't actually get contiguous space. - */ - - if (contiguous(av)) { - size -= old_size; - } - - /* - Round to a multiple of page size. - If MORECORE is not contiguous, this ensures that we only call it - with whole-page arguments. And if MORECORE is contiguous and - this is not first time through, this preserves page-alignment of - previous calls. Otherwise, we correct to page-align below. - */ - - size = (size + pagemask) & ~pagemask; - - /* - Don't try to call MORECORE if argument is so big as to appear - negative. Note that since mmap takes size_t arg, it may succeed - below even if we cannot call MORECORE. - */ - - if (size > 0) { - brk = (char *)(MORECORE(size)); - } - - /* - If have mmap, try using it as a backup when MORECORE fails or - cannot be used. This is worth doing on systems that have "holes" in - address space, so sbrk cannot extend to give contiguous space, but - space is available elsewhere. Note that we ignore mmap max count - and threshold limits, since the space will not be used as a - segregated mmap region. - */ - -#if HAVE_MMAP - if (brk == (char *)(MORECORE_FAILURE)) { - - /* Cannot merge with old top, so add its size back in */ - if (contiguous(av)) { - size = (size + old_size + pagemask) & ~pagemask; - } - - /* If we are relying on mmap as backup, then use larger units */ - if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE)) { - size = MMAP_AS_MORECORE_SIZE; - } - - /* Don't try if size wraps around 0 */ - if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { - - brk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE)); - - if (brk != (char *)(MORECORE_FAILURE)) { - - /* We do not need, and cannot use, another sbrk call to find end */ - snd_brk = brk + size; - - /* - Record that we no longer have a contiguous sbrk region. - After the first time mmap is used as backup, we do not - ever rely on contiguous space since this could incorrectly - bridge regions. - */ - set_noncontiguous(av); - } - } - } -#endif - - if (brk != (char *)(MORECORE_FAILURE)) { - av->sbrked_mem += size; - - /* - If MORECORE extends previous space, we can likewise extend top size. - */ - - if (brk == old_end && snd_brk == (char *)(MORECORE_FAILURE)) { - set_head(old_top, (size + old_size) | PREV_INUSE); - } - - /* - Otherwise, make adjustments: - - * If the first time through or noncontiguous, we need to call sbrk - just to find out where the end of memory lies. - - * We need to ensure that all returned chunks from malloc will meet - MALLOC_ALIGNMENT - - * If there was an intervening foreign sbrk, we need to adjust sbrk - request size to account for fact that we will not be able to - combine new space with existing space in old_top. - - * Almost all systems internally allocate whole pages at a time, in - which case we might as well use the whole last page of request. - So we allocate enough more memory to hit a page boundary now, - which in turn causes future contiguous calls to page-align. - */ - - else { - front_misalign = 0; - end_misalign = 0; - correction = 0; - aligned_brk = brk; - - /* - If MORECORE returns an address lower than we have seen before, - we know it isn't really contiguous. This and some subsequent - checks help cope with non-conforming MORECORE functions and - the presence of "foreign" calls to MORECORE from outside of - malloc or by other threads. We cannot guarantee to detect - these in all cases, but cope with the ones we do detect. - */ - if (contiguous(av) && old_size != 0 && brk < old_end) { - set_noncontiguous(av); - } - - /* handle contiguous cases */ - if (contiguous(av)) { - - /* - We can tolerate forward non-contiguities here (usually due - to foreign calls) but treat them as part of our space for - stats reporting. - */ - if (old_size != 0) { - av->sbrked_mem += brk - old_end; - } - - /* Guarantee alignment of first new chunk made from this space */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - - /* - Skip over some bytes to arrive at an aligned position. - We don't need to specially mark these wasted front bytes. - They will never be accessed anyway because - prev_inuse of av->top (and any chunk created from its start) - is always true after initialization. - */ - - correction = MALLOC_ALIGNMENT - front_misalign; - aligned_brk += correction; - } - - /* - If this isn't adjacent to existing space, then we will not - be able to merge with old_top space, so must add to 2nd request. - */ - - correction += old_size; - - /* Extend the end address to hit a page boundary */ - end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); - correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign; - - assert(correction >= 0); - snd_brk = (char *)(MORECORE(correction)); - - if (snd_brk == (char *)(MORECORE_FAILURE)) { - /* - If can't allocate correction, try to at least find out current - brk. It might be enough to proceed without failing. - */ - correction = 0; - snd_brk = (char *)(MORECORE(0)); - } else if (snd_brk < brk) { - /* - If the second call gives noncontiguous space even though - it says it won't, the only course of action is to ignore - results of second call, and conservatively estimate where - the first call left us. Also set noncontiguous, so this - won't happen again, leaving at most one hole. - - Note that this check is intrinsically incomplete. Because - MORECORE is allowed to give more space than we ask for, - there is no reliable way to detect a noncontiguity - producing a forward gap for the second call. - */ - snd_brk = brk + size; - correction = 0; - set_noncontiguous(av); - } - } - - /* handle non-contiguous cases */ - else { - /* MORECORE/mmap must correctly align */ - assert(aligned_OK(chunk2mem(brk))); - - /* Find out current end of memory */ - if (snd_brk == (char *)(MORECORE_FAILURE)) { - snd_brk = (char *)(MORECORE(0)); - av->sbrked_mem += snd_brk - brk - size; - } - } - - /* Adjust top based on results of second sbrk */ - if (snd_brk != (char *)(MORECORE_FAILURE)) { - av->top = (mchunkptr)aligned_brk; - set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); - av->sbrked_mem += correction; - - /* - If not the first time through, we either have a - gap due to foreign sbrk or a non-contiguous region. Insert a - double fencepost at old_top to prevent consolidation with space - we don't own. These fenceposts are artificial chunks that are - marked as inuse and are in any case too small to use. We need - two to make sizes and alignments work out. - */ - - if (old_size != 0) { - /* - Shrink old_top to insert fenceposts, keeping size a - multiple of MALLOC_ALIGNMENT. We know there is at least - enough space in old_top to do this. - */ - old_size = (old_size - 3 * SIZE_SZ) & ~MALLOC_ALIGN_MASK; - set_head(old_top, old_size | PREV_INUSE); - - /* - Note that the following assignments completely overwrite - old_top when old_size was previously MINSIZE. This is - intentional. We need the fencepost, even if old_top otherwise gets - lost. - */ - chunk_at_offset(old_top, old_size)->size = - SIZE_SZ | PREV_INUSE; - - chunk_at_offset(old_top, old_size + SIZE_SZ)->size = - SIZE_SZ | PREV_INUSE; - - /* - If possible, release the rest, suppressing trimming. - */ - if (old_size >= MINSIZE) { - INTERNAL_SIZE_T tt = av->trim_threshold; - av->trim_threshold = (INTERNAL_SIZE_T)(-1); - fREe(chunk2mem(old_top)); - av->trim_threshold = tt; - } - } - } - } - - /* Update statistics */ - sum = av->sbrked_mem; - if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem)) { - av->max_sbrked_mem = sum; - } - - sum += av->mmapped_mem; - if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { - av->max_total_mem = sum; - } - - check_malloc_state(); - - /* finally, do the allocation */ - - p = av->top; - size = chunksize(p); - - /* check that one of the above allocation paths succeeded */ - if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - av->top = remainder; - set_head(p, nb | PREV_INUSE); - set_head(remainder, remainder_size | PREV_INUSE); - check_malloced_chunk(p, nb); - return chunk2mem(p); - } - } - - /* catch all failure paths */ - MALLOC_FAILURE_ACTION; - return 0; -} - -/* - sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back - to the system (via negative arguments to sbrk) if there is unused - memory at the `high' end of the malloc pool. It is called - automatically by free() when top space exceeds the trim - threshold. It is also called by the public malloc_trim routine. It - returns 1 if it actually released any memory, else 0. -*/ - -#if __STD_C -static int sYSTRIm(size_t pad, mstate av) -#else -static int sYSTRIm(pad, av) -size_t pad; -mstate av; -#endif -{ - long top_size; /* Amount of top-most memory */ - long extra; /* Amount to release */ - long released; /* Amount actually released */ - char *current_brk; /* address returned by pre-check sbrk call */ - char *new_brk; /* address returned by post-check sbrk call */ - size_t pagesz; - - pagesz = av->pagesize; - top_size = chunksize(av->top); - - /* Release in pagesize units, keeping at least one page */ - extra = ((top_size - pad - MINSIZE + (pagesz - 1)) / pagesz - 1) * pagesz; - - if (extra > 0) { - - /* - Only proceed if end of memory is where we last set it. - This avoids problems if there were foreign sbrk calls. - */ - current_brk = (char *)(MORECORE(0)); - if (current_brk == (char *)(av->top) + top_size) { - - /* - Attempt to release memory. We ignore MORECORE return value, - and instead call again to find out where new end of memory is. - This avoids problems if first call releases less than we asked, - of if failure somehow altered brk value. (We could still - encounter problems if it altered brk in some very bad way, - but the only thing we can do is adjust anyway, which will cause - some downstream failure.) - */ - - MORECORE(-extra); - new_brk = (char *)(MORECORE(0)); - - if (new_brk != (char *)MORECORE_FAILURE) { - released = (long)(current_brk - new_brk); - - if (released != 0) { - /* Success. Adjust top. */ - av->sbrked_mem -= released; - set_head(av->top, (top_size - released) | PREV_INUSE); - check_malloc_state(); - return 1; - } - } - } - } - return 0; -} - -/* - ------------------------------ malloc ------------------------------ -*/ - -#if __STD_C -Void_t *mALLOc(size_t bytes) -#else -Void_t *mALLOc(bytes) -size_t bytes; -#endif -{ - mstate av = get_malloc_state(); - - INTERNAL_SIZE_T nb; /* normalized request size */ - unsigned int idx; /* associated bin index */ - mbinptr bin; /* associated bin */ - mfastbinptr *fb; /* associated fastbin */ - - mchunkptr victim; /* inspected/selected chunk */ - INTERNAL_SIZE_T size; /* its size */ - int victim_index; /* its bin index */ - - mchunkptr remainder; /* remainder from a split */ - CHUNK_SIZE_T remainder_size; /* its size */ - - unsigned int block; /* bit map traverser */ - unsigned int bit; /* bit map traverser */ - unsigned int map; /* current word of binmap */ - - mchunkptr fwd; /* misc temp for linking */ - mchunkptr bck; /* misc temp for linking */ - - /* - Convert request size to internal form by adding SIZE_SZ bytes - overhead plus possibly more to obtain necessary alignment and/or - to obtain a size of at least MINSIZE, the smallest allocatable - size. Also, checked_request2size traps (returning 0) request sizes - that are so large that they wrap around zero when padded and - aligned. - */ - - checked_request2size(bytes, nb); - - /* - Bypass search if no frees yet - */ - if (!have_anychunks(av)) { - if (av->max_fast == 0) { /* initialization check */ - malloc_consolidate(av); - } - goto use_top; - } - - /* - If the size qualifies as a fastbin, first check corresponding bin. - */ - - if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) { - fb = &(av->fastbins[(fastbin_index(nb))]); - if ((victim = *fb) != 0) { - *fb = victim->fd; - check_remalloced_chunk(victim, nb); - return chunk2mem(victim); - } - } - - /* - If a small request, check regular bin. Since these "smallbins" - hold one size each, no searching within bins is necessary. - (For a large request, we need to wait until unsorted chunks are - processed to find best fit. But for small ones, fits are exact - anyway, so we can check now, which is faster.) - */ - - if (in_smallbin_range(nb)) { - idx = smallbin_index(nb); - bin = bin_at(av, idx); - - if ((victim = last(bin)) != bin) { - bck = victim->bk; - set_inuse_bit_at_offset(victim, nb); - bin->bk = bck; - bck->fd = bin; - - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - } - - /* - If this is a large request, consolidate fastbins before continuing. - While it might look excessive to kill all fastbins before - even seeing if there is space available, this avoids - fragmentation problems normally associated with fastbins. - Also, in practice, programs tend to have runs of either small or - large requests, but less often mixtures, so consolidation is not - invoked all that often in most programs. And the programs that - it is called frequently in otherwise tend to fragment. - */ - - else { - idx = largebin_index(nb); - if (have_fastchunks(av)) { - malloc_consolidate(av); - } - } - - /* - Process recently freed or remaindered chunks, taking one only if - it is exact fit, or, if this a small request, the chunk is remainder from - the most recent non-exact fit. Place other traversed chunks in - bins. Note that this step is the only place in any routine where - chunks are placed in bins. - */ - - while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { - bck = victim->bk; - size = chunksize(victim); - - /* - If a small request, try to use last remainder if it is the - only chunk in unsorted bin. This helps promote locality for - runs of consecutive small requests. This is the only - exception to best-fit, and applies only when there is - no exact fit for a small chunk. - */ - - if (in_smallbin_range(nb) && - bck == unsorted_chunks(av) && - victim == av->last_remainder && - (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { - - /* split and reattach remainder */ - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - av->last_remainder = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - - set_head(victim, nb | PREV_INUSE); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - - /* remove from unsorted list */ - unsorted_chunks(av)->bk = bck; - bck->fd = unsorted_chunks(av); - - /* Take now instead of binning if exact fit */ - - if (size == nb) { - set_inuse_bit_at_offset(victim, size); - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - - /* place chunk in bin */ - - if (in_smallbin_range(size)) { - victim_index = smallbin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - } else { - victim_index = largebin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - - if (fwd != bck) { - /* if smaller than smallest, place first */ - if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) { - fwd = bck; - bck = bck->bk; - } else if ((CHUNK_SIZE_T)(size) >= - (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { - - /* maintain large bins in sorted order */ - size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ - while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) { - fwd = fwd->fd; - } - bck = fwd->bk; - } - } - } - - mark_bin(av, victim_index); - victim->bk = bck; - victim->fd = fwd; - fwd->bk = victim; - bck->fd = victim; - } - - /* - If a large request, scan through the chunks of current bin to - find one that fits. (This will be the smallest that fits unless - FIRST_SORTED_BIN_SIZE has been changed from default.) This is - the only step where an unbounded number of chunks might be - scanned without doing anything useful with them. However the - lists tend to be short. - */ - - if (!in_smallbin_range(nb)) { - bin = bin_at(av, idx); - - for (victim = last(bin); victim != bin; victim = victim->bk) { - size = chunksize(victim); - - if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) { - remainder_size = size - nb; - unlink(victim, bck, fwd); - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - set_head(victim, nb | PREV_INUSE); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - } - } - } - - /* - Search for a chunk by scanning bins, starting with next largest - bin. This search is strictly by best-fit; i.e., the smallest - (with ties going to approximately the least recently used) chunk - that fits is selected. - - The bitmap avoids needing to check that most blocks are nonempty. - */ - - ++idx; - bin = bin_at(av, idx); - block = idx2block(idx); - map = av->binmap[block]; - bit = idx2bit(idx); - - for (;;) { - - /* Skip rest of block if there are no more set bits in this block. */ - if (bit > map || bit == 0) { - do { - if (++block >= BINMAPSIZE) { /* out of bins */ - goto use_top; - } - } while ((map = av->binmap[block]) == 0); - - bin = bin_at(av, (block << BINMAPSHIFT)); - bit = 1; - } - - /* Advance to bin with set bit. There must be one. */ - while ((bit & map) == 0) { - bin = next_bin(bin); - bit <<= 1; - assert(bit != 0); - } - - /* Inspect the bin. It is likely to be non-empty */ - victim = last(bin); - - /* If a false alarm (empty bin), clear the bit. */ - if (victim == bin) { - av->binmap[block] = map &= ~bit; /* Write through */ - bin = next_bin(bin); - bit <<= 1; - } - - else { - size = chunksize(victim); - - /* We know the first chunk in this bin is big enough to use. */ - assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)); - - remainder_size = size - nb; - - /* unlink */ - bck = victim->bk; - bin->bk = bck; - bck->fd = bin; - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - /* advertise as last remainder */ - if (in_smallbin_range(nb)) { - av->last_remainder = remainder; - } - - set_head(victim, nb | PREV_INUSE); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - } - } - -use_top: - /* - If large enough, split off the chunk bordering the end of memory - (held in av->top). Note that this is in accord with the best-fit - search rule. In effect, av->top is treated as larger (and thus - less well fitting) than any other available chunk since it can - be extended to be as large as necessary (up to system - limitations). - - We require that av->top always exists (i.e., has size >= - MINSIZE) after initialization, so if it would otherwise be - exhuasted by current request, it is replenished. (The main - reason for ensuring it exists is that we may need MINSIZE space - to put in fenceposts in sysmalloc.) - */ - - victim = av->top; - size = chunksize(victim); - - if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - av->top = remainder; - set_head(victim, nb | PREV_INUSE); - set_head(remainder, remainder_size | PREV_INUSE); - - check_malloced_chunk(victim, nb); - return chunk2mem(victim); - } - - /* - If no space in top, relay to handle system-dependent cases - */ - return sYSMALLOc(nb, av); -} - -/* - ------------------------------ free ------------------------------ -*/ - -#if __STD_C -void fREe(Void_t *mem) -#else -void fREe(mem) Void_t *mem; -#endif -{ - mstate av = get_malloc_state(); - - mchunkptr p; /* chunk corresponding to mem */ - INTERNAL_SIZE_T size; /* its size */ - mfastbinptr *fb; /* associated fastbin */ - mchunkptr nextchunk; /* next contiguous chunk */ - INTERNAL_SIZE_T nextsize; /* its size */ - int nextinuse; /* true if nextchunk is used */ - INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - /* free(0) has no effect */ - if (mem != 0) { - p = mem2chunk(mem); - size = chunksize(p); - - check_inuse_chunk(p); - - /* - If eligible, place chunk on a fastbin so it can be found - and used quickly in malloc. - */ - - if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast) - -#if TRIM_FASTBINS - /* - If TRIM_FASTBINS set, don't place chunks - bordering top into fastbins - */ - && (chunk_at_offset(p, size) != av->top) -#endif - ) { - - set_fastchunks(av); - fb = &(av->fastbins[fastbin_index(size)]); - p->fd = *fb; - *fb = p; - } - - /* - Consolidate other non-mmapped chunks as they arrive. - */ - - else if (!chunk_is_mmapped(p)) { - set_anychunks(av); - - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - - /* consolidate backward */ - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long)prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - /* get and clear inuse bit */ - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - set_head(nextchunk, nextsize); - - /* consolidate forward */ - if (!nextinuse) { - unlink(nextchunk, bck, fwd); - size += nextsize; - } - - /* - Place the chunk in unsorted chunk list. Chunks are - not placed into regular bins until after they have - been given one chance to be used in malloc. - */ - - bck = unsorted_chunks(av); - fwd = bck->fd; - p->bk = bck; - p->fd = fwd; - bck->fd = p; - fwd->bk = p; - - set_head(p, size | PREV_INUSE); - set_foot(p, size); - - check_free_chunk(p); - } - - /* - If the chunk borders the current high end of memory, - consolidate into top - */ - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - check_chunk(p); - } - - /* - If freeing a large space, consolidate possibly-surrounding - chunks. Then, if the total unused topmost memory exceeds trim - threshold, ask malloc_trim to reduce top. - - Unless max_fast is 0, we don't know if there are fastbins - bordering top, so we cannot tell for sure whether threshold - has been reached unless fastbins are consolidated. But we - don't want to consolidate on each free. As a compromise, - consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD - is reached. - */ - - if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { - if (have_fastchunks(av)) { - malloc_consolidate(av); - } - -#ifndef MORECORE_CANNOT_TRIM - if ((CHUNK_SIZE_T)(chunksize(av->top)) >= - (CHUNK_SIZE_T)(av->trim_threshold)) { - sYSTRIm(av->top_pad, av); - } -#endif - } - } - /* - If the chunk was allocated via mmap, release via munmap() - Note that if HAVE_MMAP is false but chunk_is_mmapped is - true, then user must have overwritten memory. There's nothing - we can do to catch this error unless DL_DEBUG is set, in which case - check_inuse_chunk (above) will have triggered error. - */ - - else { -#if HAVE_MMAP - INTERNAL_SIZE_T offset = p->prev_size; - av->n_mmaps--; - av->mmapped_mem -= (size + offset); - munmap((char *)p - offset, size + offset); -#endif - } - } -} - -/* - ------------------------- malloc_consolidate ------------------------- - - malloc_consolidate is a specialized version of free() that tears - down chunks held in fastbins. Free itself cannot be used for this - purpose since, among other things, it might place chunks back onto - fastbins. So, instead, we need to use a minor variant of the same - code. - - Also, because this routine needs to be called the first time through - malloc anyway, it turns out to be the perfect place to trigger - initialization code. -*/ - -#if __STD_C -static void malloc_consolidate(mstate av) -#else -static void malloc_consolidate(av) mstate av; -#endif -{ - mfastbinptr *fb; /* current fastbin being consolidated */ - mfastbinptr *maxfb; /* last fastbin (for loop control) */ - mchunkptr p; /* current chunk being consolidated */ - mchunkptr nextp; /* next chunk to consolidate */ - mchunkptr unsorted_bin; /* bin header */ - mchunkptr first_unsorted; /* chunk to link to */ - - /* These have same use as in free() */ - mchunkptr nextchunk; - INTERNAL_SIZE_T size; - INTERNAL_SIZE_T nextsize; - INTERNAL_SIZE_T prevsize; - int nextinuse; - mchunkptr bck; - mchunkptr fwd; - - /* - If max_fast is 0, we know that av hasn't - yet been initialized, in which case do so below - */ - - if (av->max_fast != 0) { - clear_fastchunks(av); - - unsorted_bin = unsorted_chunks(av); - - /* - Remove each chunk from fast bin and consolidate it, placing it - then in unsorted bin. Among other reasons for doing this, - placing in unsorted bin avoids needing to calculate actual bins - until malloc is sure that chunks aren't immediately going to be - reused anyway. - */ - - maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); - fb = &(av->fastbins[0]); - do { - if ((p = *fb) != 0) { - *fb = 0; - - do { - check_inuse_chunk(p); - nextp = p->fd; - - /* Slightly streamlined version of consolidation code in free() */ - size = p->size & ~PREV_INUSE; - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long)prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - set_head(nextchunk, nextsize); - - if (!nextinuse) { - size += nextsize; - unlink(nextchunk, bck, fwd); - } - - first_unsorted = unsorted_bin->fd; - unsorted_bin->fd = p; - first_unsorted->bk = p; - - set_head(p, size | PREV_INUSE); - p->bk = unsorted_bin; - p->fd = first_unsorted; - set_foot(p, size); - } - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - } - - } while ((p = nextp) != 0); - } - } while (fb++ != maxfb); - } else { - malloc_init_state(av); - check_malloc_state(); - } -} - -/* - ------------------------------ realloc ------------------------------ -*/ - -#if __STD_C -Void_t *rEALLOc(Void_t *oldmem, size_t bytes) -#else -Void_t *rEALLOc(oldmem, bytes) -Void_t *oldmem; -size_t bytes; -#endif -{ - mstate av = get_malloc_state(); - - INTERNAL_SIZE_T nb; /* padded request size */ - - mchunkptr oldp; /* chunk corresponding to oldmem */ - INTERNAL_SIZE_T oldsize; /* its size */ - - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - Void_t *newmem; /* corresponding user mem */ - - mchunkptr next; /* next contiguous chunk after oldp */ - - mchunkptr remainder; /* extra space at end of newp */ - CHUNK_SIZE_T remainder_size; /* its size */ - - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - CHUNK_SIZE_T copysize; /* bytes to copy */ - unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ - INTERNAL_SIZE_T *s; /* copy source */ - INTERNAL_SIZE_T *d; /* copy destination */ - -#ifdef REALLOC_ZERO_BYTES_FREES - if (bytes == 0) { - fREe(oldmem); - return 0; - } -#endif - - /* realloc of null is supposed to be same as malloc */ - if (oldmem == 0) { - return mALLOc(bytes); - } - - checked_request2size(bytes, nb); - - oldp = mem2chunk(oldmem); - oldsize = chunksize(oldp); - - check_inuse_chunk(oldp); - - if (!chunk_is_mmapped(oldp)) { - - if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) { - /* already big enough; split below */ - newp = oldp; - newsize = oldsize; - } - - else { - next = chunk_at_offset(oldp, oldsize); - - /* Try to expand forward into top */ - if (next == av->top && - (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= - (CHUNK_SIZE_T)(nb + MINSIZE)) { - set_head_size(oldp, nb); - av->top = chunk_at_offset(oldp, nb); - set_head(av->top, (newsize - nb) | PREV_INUSE); - return chunk2mem(oldp); - } - - /* Try to expand forward into next chunk; split off remainder below */ - else if (next != av->top && - !inuse(next) && - (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= - (CHUNK_SIZE_T)(nb)) { - newp = oldp; - unlink(next, bck, fwd); - } - - /* allocate, copy, free */ - else { - newmem = mALLOc(nb - MALLOC_ALIGN_MASK); - if (newmem == 0) { - return 0; /* propagate failure */ - } - - newp = mem2chunk(newmem); - newsize = chunksize(newp); - - /* - Avoid copy if newp is next chunk after oldp. - */ - if (newp == next) { - newsize += oldsize; - newp = oldp; - } else { - /* - Unroll copy of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - copysize = oldsize - SIZE_SZ; - s = (INTERNAL_SIZE_T *)(oldmem); - d = (INTERNAL_SIZE_T *)(newmem); - ncopies = copysize / sizeof(INTERNAL_SIZE_T); - assert(ncopies >= 3); - - if (ncopies > 9) { - MALLOC_COPY(d, s, copysize); - } - - else { - *(d + 0) = *(s + 0); - *(d + 1) = *(s + 1); - *(d + 2) = *(s + 2); - if (ncopies > 4) { - *(d + 3) = *(s + 3); - *(d + 4) = *(s + 4); - if (ncopies > 6) { - *(d + 5) = *(s + 5); - *(d + 6) = *(s + 6); - if (ncopies > 8) { - *(d + 7) = *(s + 7); - *(d + 8) = *(s + 8); - } - } - } - } - - fREe(oldmem); - check_inuse_chunk(newp); - return chunk2mem(newp); - } - } - } - - /* If possible, free extra space in old or extended chunk */ - - assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb)); - - remainder_size = newsize - nb; - - if (remainder_size < MINSIZE) { /* not enough extra to split off */ - set_head_size(newp, newsize); - set_inuse_bit_at_offset(newp, newsize); - } else { /* split remainder */ - remainder = chunk_at_offset(newp, nb); - set_head_size(newp, nb); - set_head(remainder, remainder_size | PREV_INUSE); - /* Mark remainder as inuse so free() won't complain */ - set_inuse_bit_at_offset(remainder, remainder_size); - fREe(chunk2mem(remainder)); - } - - check_inuse_chunk(newp); - return chunk2mem(newp); - } - - /* - Handle mmap cases - */ - - else { -#if HAVE_MMAP - -#if HAVE_MREMAP - INTERNAL_SIZE_T offset = oldp->prev_size; - size_t pagemask = av->pagesize - 1; - char *cp; - CHUNK_SIZE_T sum; - - /* Note the extra SIZE_SZ overhead */ - newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask; - - /* don't need to remap if still within same page */ - if (oldsize == newsize - offset) { - return oldmem; - } - - cp = (char *)mremap((char *)oldp - offset, oldsize + offset, newsize, 1); - - if (cp != (char *)MORECORE_FAILURE) { - - newp = (mchunkptr)(cp + offset); - set_head(newp, (newsize - offset) | IS_MMAPPED); - - assert(aligned_OK(chunk2mem(newp))); - assert((newp->prev_size == offset)); - - /* update statistics */ - sum = av->mmapped_mem += newsize - oldsize; - if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) { - av->max_mmapped_mem = sum; - } - sum += av->sbrked_mem; - if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) { - av->max_total_mem = sum; - } - - return chunk2mem(newp); - } -#endif - - /* Note the extra SIZE_SZ overhead. */ - if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) { - newmem = oldmem; /* do nothing */ - } else { - /* Must alloc, copy, free. */ - newmem = mALLOc(nb - MALLOC_ALIGN_MASK); - if (newmem != 0) { - MALLOC_COPY(newmem, oldmem, oldsize - 2 * SIZE_SZ); - fREe(oldmem); - } - } - return newmem; - -#else - /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ - check_malloc_state(); - MALLOC_FAILURE_ACTION; - return 0; -#endif - } -} - -/* - ------------------------------ memalign ------------------------------ -*/ - -#if __STD_C -Void_t *mEMALIGn(size_t alignment, size_t bytes) -#else -Void_t *mEMALIGn(alignment, bytes) -size_t alignment; -size_t bytes; -#endif -{ - INTERNAL_SIZE_T nb; /* padded request size */ - char *m; /* memory returned by malloc call */ - mchunkptr p; /* corresponding chunk */ - char *brk; /* alignment point within p */ - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ - mchunkptr remainder; /* spare room at end to split off */ - CHUNK_SIZE_T remainder_size; /* its size */ - INTERNAL_SIZE_T size; - - /* If need less alignment than we give anyway, just relay to malloc */ - - if (alignment <= MALLOC_ALIGNMENT) { - return mALLOc(bytes); - } - - /* Otherwise, ensure that it is at least a minimum chunk size */ - - if (alignment < MINSIZE) { - alignment = MINSIZE; - } - - /* Make sure alignment is power of 2 (in case MINSIZE is not). */ - if ((alignment & (alignment - 1)) != 0) { - size_t a = MALLOC_ALIGNMENT * 2; - while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) { - a <<= 1; - } - alignment = a; - } - - checked_request2size(bytes, nb); - - /* - Strategy: find a spot within that chunk that meets the alignment - request, and then possibly free the leading and trailing space. - */ - - /* Call malloc with worst case padding to hit alignment. */ - - m = (char *)(mALLOc(nb + alignment + MINSIZE)); - - if (m == 0) { - return 0; /* propagate failure */ - } - - p = mem2chunk(m); - - if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */ - - /* - Find an aligned spot inside chunk. Since we need to give back - leading space in a chunk of at least MINSIZE, if the first - calculation places us at a spot with less than MINSIZE leader, - we can move to the next aligned spot -- we've allocated enough - total room so that this is always possible. - */ - - brk = (char *)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) & - -((signed long)alignment))); - if ((CHUNK_SIZE_T)(brk - (char *)(p)) < MINSIZE) { - brk += alignment; - } - - newp = (mchunkptr)brk; - leadsize = brk - (char *)(p); - newsize = chunksize(p) - leadsize; - - /* For mmapped chunks, just adjust offset */ - if (chunk_is_mmapped(p)) { - newp->prev_size = p->prev_size + leadsize; - set_head(newp, newsize | IS_MMAPPED); - return chunk2mem(newp); - } - - /* Otherwise, give back leader, use the rest */ - set_head(newp, newsize | PREV_INUSE); - set_inuse_bit_at_offset(newp, newsize); - set_head_size(p, leadsize); - fREe(chunk2mem(p)); - p = newp; - - assert(newsize >= nb && - (((PTR_UINT)(chunk2mem(p))) % alignment) == 0); - } - - /* Also give back spare room at the end */ - if (!chunk_is_mmapped(p)) { - size = chunksize(p); - if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - set_head(remainder, remainder_size | PREV_INUSE); - set_head_size(p, nb); - fREe(chunk2mem(remainder)); - } - } - - check_inuse_chunk(p); - return chunk2mem(p); -} - -/* - ------------------------------ calloc ------------------------------ -*/ - -#if __STD_C -Void_t *cALLOc(size_t n_elements, size_t elem_size) -#else -Void_t *cALLOc(n_elements, elem_size) -size_t n_elements; -size_t elem_size; -#endif -{ - mchunkptr p; - CHUNK_SIZE_T clearsize; - CHUNK_SIZE_T nclears; - INTERNAL_SIZE_T *d; - - Void_t *mem = mALLOc(n_elements * elem_size); - - if (mem != 0) { - p = mem2chunk(mem); - - if (!chunk_is_mmapped(p)) { - /* - Unroll clear of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - d = (INTERNAL_SIZE_T *)mem; - clearsize = chunksize(p) - SIZE_SZ; - nclears = clearsize / sizeof(INTERNAL_SIZE_T); - assert(nclears >= 3); - - if (nclears > 9) { - MALLOC_ZERO(d, clearsize); - } - - else { - *(d + 0) = 0; - *(d + 1) = 0; - *(d + 2) = 0; - if (nclears > 4) { - *(d + 3) = 0; - *(d + 4) = 0; - if (nclears > 6) { - *(d + 5) = 0; - *(d + 6) = 0; - if (nclears > 8) { - *(d + 7) = 0; - *(d + 8) = 0; - } - } - } - } - } -#if !MMAP_CLEARS - else { - d = (INTERNAL_SIZE_T *)mem; - /* - Note the additional SIZE_SZ - */ - clearsize = chunksize(p) - 2 * SIZE_SZ; - MALLOC_ZERO(d, clearsize); - } -#endif - } - return mem; -} - -/* - ------------------------------ cfree ------------------------------ -*/ - -#if __STD_C -void cFREe(Void_t *mem) -#else -void cFREe(mem) Void_t *mem; -#endif -{ - fREe(mem); -} - -/* - ------------------------- independent_calloc ------------------------- -*/ - -#if __STD_C -Void_t **iCALLOc(size_t n_elements, size_t elem_size, Void_t *chunks[]) -#else -Void_t **iCALLOc(n_elements, elem_size, chunks) -size_t n_elements; -size_t elem_size; -Void_t *chunks[]; -#endif -{ - size_t sz = elem_size; /* serves as 1-element array */ - /* opts arg of 3 means all elements are same size, and should be cleared */ - return iALLOc(n_elements, &sz, 3, chunks); -} - -/* - ------------------------- independent_comalloc ------------------------- -*/ - -#if __STD_C -Void_t **iCOMALLOc(size_t n_elements, size_t sizes[], Void_t *chunks[]) -#else -Void_t **iCOMALLOc(n_elements, sizes, chunks) -size_t n_elements; -size_t sizes[]; -Void_t *chunks[]; -#endif -{ - return iALLOc(n_elements, sizes, 0, chunks); -} - -/* - ------------------------------ ialloc ------------------------------ - ialloc provides common support for independent_X routines, handling all of - the combinations that can result. - - The opts arg has: - bit 0 set if all elements are same size (using sizes[0]) - bit 1 set if elements should be zeroed -*/ - -#if __STD_C -static Void_t **iALLOc(size_t n_elements, - size_t *sizes, - int opts, - Void_t *chunks[]) -#else -static Void_t **iALLOc(n_elements, sizes, opts, chunks) -size_t n_elements; -size_t *sizes; -int opts; -Void_t *chunks[]; -#endif -{ - mstate av = get_malloc_state(); - INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */ - INTERNAL_SIZE_T contents_size; /* total size of elements */ - INTERNAL_SIZE_T array_size; /* request size of pointer array */ - Void_t *mem; /* malloced aggregate space */ - mchunkptr p; /* corresponding chunk */ - INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */ - Void_t **marray; /* either "chunks" or malloced ptr array */ - mchunkptr array_chunk; /* chunk for malloced ptr array */ - int mmx; /* to disable mmap */ - INTERNAL_SIZE_T size; - size_t i; - - /* Ensure initialization */ - if (av->max_fast == 0) { - malloc_consolidate(av); - } - - /* compute array length, if needed */ - if (chunks != 0) { - if (n_elements == 0) { - return chunks; /* nothing to do */ - } - marray = chunks; - array_size = 0; - } else { - /* if empty req, must still return chunk representing empty array */ - if (n_elements == 0) { - return (Void_t **)mALLOc(0); - } - marray = 0; - array_size = request2size(n_elements * (sizeof(Void_t *))); - } - - /* compute total element size */ - if (opts & 0x1) { /* all-same-size */ - element_size = request2size(*sizes); - contents_size = n_elements * element_size; - } else { /* add up all the sizes */ - element_size = 0; - contents_size = 0; - for (i = 0; i != n_elements; ++i) { - contents_size += request2size(sizes[i]); - } - } - - /* subtract out alignment bytes from total to minimize overallocation */ - size = contents_size + array_size - MALLOC_ALIGN_MASK; - - /* - Allocate the aggregate chunk. - But first disable mmap so malloc won't use it, since - we would not be able to later free/realloc space internal - to a segregated mmap region. - */ - mmx = av->n_mmaps_max; /* disable mmap */ - av->n_mmaps_max = 0; - mem = mALLOc(size); - av->n_mmaps_max = mmx; /* reset mmap */ - if (mem == 0) { - return 0; - } - - p = mem2chunk(mem); - assert(!chunk_is_mmapped(p)); - remainder_size = chunksize(p); - - if (opts & 0x2) { /* optionally clear the elements */ - MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size); - } - - /* If not provided, allocate the pointer array as final part of chunk */ - if (marray == 0) { - array_chunk = chunk_at_offset(p, contents_size); - marray = (Void_t **)(chunk2mem(array_chunk)); - set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE); - remainder_size = contents_size; - } - - /* split out elements */ - for (i = 0;; ++i) { - marray[i] = chunk2mem(p); - if (i != n_elements - 1) { - if (element_size != 0) { - size = element_size; - } else { - size = request2size(sizes[i]); - } - remainder_size -= size; - set_head(p, size | PREV_INUSE); - p = chunk_at_offset(p, size); - } else { /* the final element absorbs any overallocation slop */ - set_head(p, remainder_size | PREV_INUSE); - break; - } - } - -#if DL_DEBUG - if (marray != chunks) { - /* final element must have exactly exhausted chunk */ - if (element_size != 0) { - assert(remainder_size == element_size); - } else { - assert(remainder_size == request2size(sizes[i])); - } - check_inuse_chunk(mem2chunk(marray)); - } - - for (i = 0; i != n_elements; ++i) { - check_inuse_chunk(mem2chunk(marray[i])); - } -#endif - - return marray; -} - -/* - ------------------------------ valloc ------------------------------ -*/ - -#if __STD_C -Void_t *vALLOc(size_t bytes) -#else -Void_t *vALLOc(bytes) -size_t bytes; -#endif -{ - /* Ensure initialization */ - mstate av = get_malloc_state(); - if (av->max_fast == 0) { - malloc_consolidate(av); - } - return mEMALIGn(av->pagesize, bytes); -} - -/* - ------------------------------ pvalloc ------------------------------ -*/ - -#if __STD_C -Void_t *pVALLOc(size_t bytes) -#else -Void_t *pVALLOc(bytes) -size_t bytes; -#endif -{ - mstate av = get_malloc_state(); - size_t pagesz; - - /* Ensure initialization */ - if (av->max_fast == 0) { - malloc_consolidate(av); - } - pagesz = av->pagesize; - return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); -} - -/* - ------------------------------ malloc_trim ------------------------------ -*/ - -#if __STD_C -int mTRIm(size_t pad) -#else -int mTRIm(pad) -size_t pad; -#endif -{ - mstate av = get_malloc_state(); - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - -#ifndef MORECORE_CANNOT_TRIM - return sYSTRIm(pad, av); -#else - return 0; -#endif -} - -/* - ------------------------- malloc_usable_size ------------------------- -*/ - -#if __STD_C -size_t mUSABLe(Void_t *mem) -#else -size_t mUSABLe(mem) -Void_t *mem; -#endif -{ - mchunkptr p; - if (mem != 0) { - p = mem2chunk(mem); - if (chunk_is_mmapped(p)) { - return chunksize(p) - 2 * SIZE_SZ; - } else if (inuse(p)) { - return chunksize(p) - SIZE_SZ; - } - } - return 0; -} - -/* - ------------------------------ mallinfo ------------------------------ -*/ - -struct mallinfo mALLINFo() { - mstate av = get_malloc_state(); - struct mallinfo mi; - int i; - mbinptr b; - mchunkptr p; - INTERNAL_SIZE_T avail; - INTERNAL_SIZE_T fastavail; - int nblocks; - int nfastblocks; - - /* Ensure initialization */ - if (av->top == 0) { - malloc_consolidate(av); - } - - check_malloc_state(); - - /* Account for top */ - avail = chunksize(av->top); - nblocks = 1; /* top always exists */ - - /* traverse fastbins */ - nfastblocks = 0; - fastavail = 0; - - for (i = 0; NFASTBINS - i > 0; ++i) { - for (p = av->fastbins[i]; p != 0; p = p->fd) { - ++nfastblocks; - fastavail += chunksize(p); - } - } - - avail += fastavail; - - /* traverse regular bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av, i); - for (p = last(b); p != b; p = p->bk) { - ++nblocks; - avail += chunksize(p); - } - } - - mi.smblks = nfastblocks; - mi.ordblks = nblocks; - mi.fordblks = avail; - mi.uordblks = av->sbrked_mem - avail; - mi.arena = av->sbrked_mem; - mi.hblks = av->n_mmaps; - mi.hblkhd = av->mmapped_mem; - mi.fsmblks = fastavail; - mi.keepcost = chunksize(av->top); - mi.usmblks = av->max_total_mem; - return mi; -} - -/* - ------------------------------ malloc_stats ------------------------------ -*/ - -void mSTATs(void) { - struct mallinfo mi = mALLINFo(); - -#ifdef WIN32 - { - CHUNK_SIZE_T free, reserved, committed; - vminfo(&free, &reserved, &committed); - fprintf(stderr, "free bytes = %10lu\n", - free); - fprintf(stderr, "reserved bytes = %10lu\n", - reserved); - fprintf(stderr, "committed bytes = %10lu\n", - committed); - } -#endif - - fprintf(stderr, "max system bytes = %10lu\n", - (CHUNK_SIZE_T)(mi.usmblks)); - fprintf(stderr, "system bytes = %10lu\n", - (CHUNK_SIZE_T)(mi.arena + mi.hblkhd)); - fprintf(stderr, "in use bytes = %10lu\n", - (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd)); - -#ifdef WIN32 - { - CHUNK_SIZE_T kernel, user; - if (cpuinfo(TRUE, &kernel, &user)) { - fprintf(stderr, "kernel ms = %10lu\n", - kernel); - fprintf(stderr, "user ms = %10lu\n", - user); - } - } -#endif -} - -/* - ------------------------------ mallopt ------------------------------ -*/ - -#if __STD_C -int mALLOPt(int param_number, int value) -#else -int mALLOPt(param_number, value) -int param_number; -int value; -#endif -{ - mstate av = get_malloc_state(); - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - - switch (param_number) { - case M_MXFAST: - if (value >= 0 && value <= MAX_FAST_SIZE) { - set_max_fast(av, value); - return 1; - } else { - return 0; - } - - case M_TRIM_THRESHOLD: - av->trim_threshold = value; - return 1; - - case M_TOP_PAD: - av->top_pad = value; - return 1; - - case M_MMAP_THRESHOLD: - av->mmap_threshold = value; - return 1; - - case M_MMAP_MAX: -#if !HAVE_MMAP - if (value != 0) { - return 0; - } -#endif - av->n_mmaps_max = value; - return 1; - - default: - return 0; - } -} - -/* - -------------------- Alternative MORECORE functions -------------------- -*/ - -/* - General Requirements for MORECORE. - - The MORECORE function must have the following properties: - - If MORECORE_CONTIGUOUS is false: - - * MORECORE must allocate in multiples of pagesize. It will - only be called with arguments that are multiples of pagesize. - - * MORECORE(0) must return an address that is at least - MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) - - else (i.e. If MORECORE_CONTIGUOUS is true): - - * Consecutive calls to MORECORE with positive arguments - return increasing addresses, indicating that space has been - contiguously extended. - - * MORECORE need not allocate in multiples of pagesize. - Calls to MORECORE need not have args of multiples of pagesize. - - * MORECORE need not page-align. - - In either case: - - * MORECORE may allocate more memory than requested. (Or even less, - but this will generally result in a malloc failure.) - - * MORECORE must not allocate memory when given argument zero, but - instead return one past the end address of memory from previous - nonzero call. This malloc does NOT call MORECORE(0) - until at least one call with positive arguments is made, so - the initial value returned is not important. - - * Even though consecutive calls to MORECORE need not return contiguous - addresses, it must be OK for malloc'ed chunks to span multiple - regions in those cases where they do happen to be contiguous. - - * MORECORE need not handle negative arguments -- it may instead - just return MORECORE_FAILURE when given negative arguments. - Negative arguments are always multiples of pagesize. MORECORE - must not misinterpret negative args as large positive unsigned - args. You can suppress all such calls from even occurring by defining - MORECORE_CANNOT_TRIM, - - There is some variation across systems about the type of the - argument to sbrk/MORECORE. If size_t is unsigned, then it cannot - actually be size_t, because sbrk supports negative args, so it is - normally the signed type of the same width as size_t (sometimes - declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much - matter though. Internally, we use "long" as arguments, which should - work across all reasonable possibilities. - - Additionally, if MORECORE ever returns failure for a positive - request, and HAVE_MMAP is true, then mmap is used as a noncontiguous - system allocator. This is a useful backup strategy for systems with - holes in address spaces -- in this case sbrk cannot contiguously - expand the heap, but mmap may be able to map noncontiguous space. - - If you'd like mmap to ALWAYS be used, you can define MORECORE to be - a function that always returns MORECORE_FAILURE. - - Malloc only has limited ability to detect failures of MORECORE - to supply contiguous space when it says it can. In particular, - multithreaded programs that do not use locks may result in - rece conditions across calls to MORECORE that result in gaps - that cannot be detected as such, and subsequent corruption. - - If you are using this malloc with something other than sbrk (or its - emulation) to supply memory regions, you probably want to set - MORECORE_CONTIGUOUS as false. As an example, here is a custom - allocator kindly contributed for pre-OSX macOS. It uses virtually - but not necessarily physically contiguous non-paged memory (locked - in, present and won't get swapped out). You can use it by - uncommenting this section, adding some #includes, and setting up the - appropriate defines above: - - #define MORECORE osMoreCore - #define MORECORE_CONTIGUOUS 0 - - There is also a shutdown routine that should somehow be called for - cleanup upon program exit. - - #define MAX_POOL_ENTRIES 100 - #define MINIMUM_MORECORE_SIZE (64 * 1024) - static int next_os_pool; - void *our_os_pools[MAX_POOL_ENTRIES]; - - void *osMoreCore(int size) - { - void *ptr = 0; - static void *sbrk_top = 0; - - if (size > 0) - { - if (size < MINIMUM_MORECORE_SIZE) - size = MINIMUM_MORECORE_SIZE; - if (CurrentExecutionLevel() == kTaskLevel) - ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); - if (ptr == 0) - { - return (void *) MORECORE_FAILURE; - } - // save ptrs so they can be freed during cleanup - our_os_pools[next_os_pool] = ptr; - next_os_pool++; - ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); - sbrk_top = (char *) ptr + size; - return ptr; - } - else if (size < 0) - { - // we don't currently support shrink behavior - return (void *) MORECORE_FAILURE; - } - else - { - return sbrk_top; - } - } - - // cleanup any allocated memory pools - // called as last thing before shutting down driver - - void osCleanupMem(void) - { - void **ptr; - - for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) - if (*ptr) - { - PoolDeallocate(*ptr); - *ptr = 0; - } - } - -*/ - -/* - -------------------------------------------------------------- - - Emulation of sbrk for win32. - Donated by J. Walter . - For additional information about this code, and malloc on Win32, see - http://www.genesys-e.de/jwalter/ -*/ - -#ifdef WIN32 - -#ifdef _DEBUG -/* #define TRACE */ -#endif - -/* Support for USE_MALLOC_LOCK */ -#ifdef USE_MALLOC_LOCK - -/* Wait for spin lock */ -static int slwait(int *sl) { - while (InterlockedCompareExchange((void **)sl, (void *)1, (void *)0) != 0) { - Sleep(0); - } - return 0; -} - -/* Release spin lock */ -static int slrelease(int *sl) { - InterlockedExchange(sl, 0); - return 0; -} - -#ifdef NEEDED -/* Spin lock for emulation code */ -static int g_sl; -#endif - -#endif /* USE_MALLOC_LOCK */ - -/* getpagesize for windows */ -static long getpagesize(void) { - static long g_pagesize = 0; - if (!g_pagesize) { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - g_pagesize = system_info.dwPageSize; - } - return g_pagesize; -} -static long getregionsize(void) { - static long g_regionsize = 0; - if (!g_regionsize) { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - g_regionsize = system_info.dwAllocationGranularity; - } - return g_regionsize; -} - -/* A region list entry */ -typedef struct _region_list_entry { - void *top_allocated; - void *top_committed; - void *top_reserved; - long reserve_size; - struct _region_list_entry *previous; -} region_list_entry; - -/* Allocate and link a region entry in the region list */ -static int region_list_append(region_list_entry **last, void *base_reserved, long reserve_size) { - region_list_entry *next = HeapAlloc(GetProcessHeap(), 0, sizeof(region_list_entry)); - if (!next) { - return FALSE; - } - next->top_allocated = (char *)base_reserved; - next->top_committed = (char *)base_reserved; - next->top_reserved = (char *)base_reserved + reserve_size; - next->reserve_size = reserve_size; - next->previous = *last; - *last = next; - return TRUE; -} -/* Free and unlink the last region entry from the region list */ -static int region_list_remove(region_list_entry **last) { - region_list_entry *previous = (*last)->previous; - if (!HeapFree(GetProcessHeap(), sizeof(region_list_entry), *last)) { - return FALSE; - } - *last = previous; - return TRUE; -} - -#define CEIL(size, to) (((size) + (to) - 1) & ~((to) - 1)) -#define FLOOR(size, to) ((size) & ~((to) - 1)) - -#define SBRK_SCALE 0 -/* #define SBRK_SCALE 1 */ -/* #define SBRK_SCALE 2 */ -/* #define SBRK_SCALE 4 */ - -/* sbrk for windows */ -static void *sbrk(long size) { - static long g_pagesize, g_my_pagesize; - static long g_regionsize, g_my_regionsize; - static region_list_entry *g_last; - void *result = (void *)MORECORE_FAILURE; -#ifdef TRACE - printf("sbrk %d\n", size); -#endif -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Wait for spin lock */ - slwait(&g_sl); -#endif - /* First time initialization */ - if (!g_pagesize) { - g_pagesize = getpagesize(); - g_my_pagesize = g_pagesize << SBRK_SCALE; - } - if (!g_regionsize) { - g_regionsize = getregionsize(); - g_my_regionsize = g_regionsize << SBRK_SCALE; - } - if (!g_last) { - if (!region_list_append(&g_last, 0, 0)) { - goto sbrk_exit; - } - } - /* Assert invariants */ - assert(g_last); - assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated && - g_last->top_allocated <= g_last->top_committed); - assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed && - g_last->top_committed <= g_last->top_reserved && - (unsigned)g_last->top_committed % g_pagesize == 0); - assert((unsigned)g_last->top_reserved % g_regionsize == 0); - assert((unsigned)g_last->reserve_size % g_regionsize == 0); - /* Allocation requested? */ - if (size >= 0) { - /* Allocation size is the requested size */ - long allocate_size = size; - /* Compute the size to commit */ - long to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; - /* Do we reach the commit limit? */ - if (to_commit > 0) { - /* Round size to commit */ - long commit_size = CEIL(to_commit, g_my_pagesize); - /* Compute the size to reserve */ - long to_reserve = (char *)g_last->top_committed + commit_size - (char *)g_last->top_reserved; - /* Do we reach the reserve limit? */ - if (to_reserve > 0) { - /* Compute the remaining size to commit in the current region */ - long remaining_commit_size = (char *)g_last->top_reserved - (char *)g_last->top_committed; - if (remaining_commit_size > 0) { - /* Assert preconditions */ - assert((unsigned)g_last->top_committed % g_pagesize == 0); - assert(0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0); - { - /* Commit this */ - void *base_committed = VirtualAlloc(g_last->top_committed, remaining_commit_size, - MEM_COMMIT, PAGE_READWRITE); - /* Check returned pointer for consistency */ - if (base_committed != g_last->top_committed) { - goto sbrk_exit; - } - /* Assert postconditions */ - assert((unsigned)base_committed % g_pagesize == 0); -#ifdef TRACE - printf("Commit %p %d\n", base_committed, remaining_commit_size); -#endif - /* Adjust the regions commit top */ - g_last->top_committed = (char *)base_committed + remaining_commit_size; - } - } - { - /* Now we are going to search and reserve. */ - int contiguous = -1; - int found = FALSE; - MEMORY_BASIC_INFORMATION memory_info; - void *base_reserved; - long reserve_size; - do { - /* Assume contiguous memory */ - contiguous = TRUE; - /* Round size to reserve */ - reserve_size = CEIL(to_reserve, g_my_regionsize); - /* Start with the current region's top */ - memory_info.BaseAddress = g_last->top_reserved; - /* Assert preconditions */ - assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); - assert(0 < reserve_size && reserve_size % g_regionsize == 0); - while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) { - /* Assert postconditions */ - assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); -#ifdef TRACE - printf("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize, - memory_info.State == MEM_FREE ? "FREE" : (memory_info.State == MEM_RESERVE ? "RESERVED" : (memory_info.State == MEM_COMMIT ? "COMMITTED" : "?"))); -#endif - /* Region is free, well aligned and big enough: we are done */ - if (memory_info.State == MEM_FREE && - (unsigned)memory_info.BaseAddress % g_regionsize == 0 && - memory_info.RegionSize >= (unsigned)reserve_size) { - found = TRUE; - break; - } - /* From now on we can't get contiguous memory! */ - contiguous = FALSE; - /* Recompute size to reserve */ - reserve_size = CEIL(allocate_size, g_my_regionsize); - memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize; - /* Assert preconditions */ - assert((unsigned)memory_info.BaseAddress % g_pagesize == 0); - assert(0 < reserve_size && reserve_size % g_regionsize == 0); - } - /* Search failed? */ - if (!found) { - goto sbrk_exit; - } - /* Assert preconditions */ - assert((unsigned)memory_info.BaseAddress % g_regionsize == 0); - assert(0 < reserve_size && reserve_size % g_regionsize == 0); - /* Try to reserve this */ - base_reserved = VirtualAlloc(memory_info.BaseAddress, reserve_size, - MEM_RESERVE, PAGE_NOACCESS); - if (!base_reserved) { - int rc = GetLastError(); - if (rc != ERROR_INVALID_ADDRESS) { - goto sbrk_exit; - } - } - /* A null pointer signals (hopefully) a race condition with another thread. */ - /* In this case, we try again. */ - } while (!base_reserved); - /* Check returned pointer for consistency */ - if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress) { - goto sbrk_exit; - } - /* Assert postconditions */ - assert((unsigned)base_reserved % g_regionsize == 0); -#ifdef TRACE - printf("Reserve %p %d\n", base_reserved, reserve_size); -#endif - /* Did we get contiguous memory? */ - if (contiguous) { - long start_size = (char *)g_last->top_committed - (char *)g_last->top_allocated; - /* Adjust allocation size */ - allocate_size -= start_size; - /* Adjust the regions allocation top */ - g_last->top_allocated = g_last->top_committed; - /* Recompute the size to commit */ - to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; - /* Round size to commit */ - commit_size = CEIL(to_commit, g_my_pagesize); - } - /* Append the new region to the list */ - if (!region_list_append(&g_last, base_reserved, reserve_size)) { - goto sbrk_exit; - } - /* Didn't we get contiguous memory? */ - if (!contiguous) { - /* Recompute the size to commit */ - to_commit = (char *)g_last->top_allocated + allocate_size - (char *)g_last->top_committed; - /* Round size to commit */ - commit_size = CEIL(to_commit, g_my_pagesize); - } - } - } - /* Assert preconditions */ - assert((unsigned)g_last->top_committed % g_pagesize == 0); - assert(0 < commit_size && commit_size % g_pagesize == 0); - { - /* Commit this */ - void *base_committed = VirtualAlloc(g_last->top_committed, commit_size, - MEM_COMMIT, PAGE_READWRITE); - /* Check returned pointer for consistency */ - if (base_committed != g_last->top_committed) { - goto sbrk_exit; - } - /* Assert postconditions */ - assert((unsigned)base_committed % g_pagesize == 0); -#ifdef TRACE - printf("Commit %p %d\n", base_committed, commit_size); -#endif - /* Adjust the regions commit top */ - g_last->top_committed = (char *)base_committed + commit_size; - } - } - /* Adjust the regions allocation top */ - g_last->top_allocated = (char *)g_last->top_allocated + allocate_size; - result = (char *)g_last->top_allocated - size; - /* Deallocation requested? */ - } else if (size < 0) { - long deallocate_size = -size; - /* As long as we have a region to release */ - while ((char *)g_last->top_allocated - deallocate_size < (char *)g_last->top_reserved - g_last->reserve_size) { - /* Get the size to release */ - long release_size = g_last->reserve_size; - /* Get the base address */ - void *base_reserved = (char *)g_last->top_reserved - release_size; - /* Assert preconditions */ - assert((unsigned)base_reserved % g_regionsize == 0); - assert(0 < release_size && release_size % g_regionsize == 0); - { - /* Release this */ - int rc = VirtualFree(base_reserved, 0, - MEM_RELEASE); - /* Check returned code for consistency */ - if (!rc) { - goto sbrk_exit; - } -#ifdef TRACE - printf("Release %p %d\n", base_reserved, release_size); -#endif - } - /* Adjust deallocation size */ - deallocate_size -= (char *)g_last->top_allocated - (char *)base_reserved; - /* Remove the old region from the list */ - if (!region_list_remove(&g_last)) { - goto sbrk_exit; - } - } - { - /* Compute the size to decommit */ - long to_decommit = (char *)g_last->top_committed - ((char *)g_last->top_allocated - deallocate_size); - if (to_decommit >= g_my_pagesize) { - /* Compute the size to decommit */ - long decommit_size = FLOOR(to_decommit, g_my_pagesize); - /* Compute the base address */ - void *base_committed = (char *)g_last->top_committed - decommit_size; - /* Assert preconditions */ - assert((unsigned)base_committed % g_pagesize == 0); - assert(0 < decommit_size && decommit_size % g_pagesize == 0); - { - /* Decommit this */ - int rc = VirtualFree((char *)base_committed, decommit_size, - MEM_DECOMMIT); - /* Check returned code for consistency */ - if (!rc) { - goto sbrk_exit; - } -#ifdef TRACE - printf("Decommit %p %d\n", base_committed, decommit_size); -#endif - } - /* Adjust deallocation size and regions commit and allocate top */ - deallocate_size -= (char *)g_last->top_allocated - (char *)base_committed; - g_last->top_committed = base_committed; - g_last->top_allocated = base_committed; - } - } - /* Adjust regions allocate top */ - g_last->top_allocated = (char *)g_last->top_allocated - deallocate_size; - /* Check for underflow */ - if ((char *)g_last->top_reserved - g_last->reserve_size > (char *)g_last->top_allocated || - g_last->top_allocated > g_last->top_committed) { - /* Adjust regions allocate top */ - g_last->top_allocated = (char *)g_last->top_reserved - g_last->reserve_size; - goto sbrk_exit; - } - result = g_last->top_allocated; - } - /* Assert invariants */ - assert(g_last); - assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_allocated && - g_last->top_allocated <= g_last->top_committed); - assert((char *)g_last->top_reserved - g_last->reserve_size <= (char *)g_last->top_committed && - g_last->top_committed <= g_last->top_reserved && - (unsigned)g_last->top_committed % g_pagesize == 0); - assert((unsigned)g_last->top_reserved % g_regionsize == 0); - assert((unsigned)g_last->reserve_size % g_regionsize == 0); - -sbrk_exit: -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Release spin lock */ - slrelease(&g_sl); -#endif - return result; -} - -/* mmap for windows */ -static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg) { - static long g_pagesize; - static long g_regionsize; -#ifdef TRACE - printf("mmap %d\n", size); -#endif -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Wait for spin lock */ - slwait(&g_sl); -#endif - /* First time initialization */ - if (!g_pagesize) { - g_pagesize = getpagesize(); - } - if (!g_regionsize) { - g_regionsize = getregionsize(); - } - /* Assert preconditions */ - assert((unsigned)ptr % g_regionsize == 0); - assert(size % g_pagesize == 0); - /* Allocate this */ - ptr = VirtualAlloc(ptr, size, - MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE); - if (!ptr) { - ptr = (void *)MORECORE_FAILURE; - goto mmap_exit; - } - /* Assert postconditions */ - assert((unsigned)ptr % g_regionsize == 0); -#ifdef TRACE - printf("Commit %p %d\n", ptr, size); -#endif -mmap_exit: -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Release spin lock */ - slrelease(&g_sl); -#endif - return ptr; -} - -/* munmap for windows */ -static long munmap(void *ptr, long size) { - static long g_pagesize; - static long g_regionsize; - int rc = MUNMAP_FAILURE; -#ifdef TRACE - printf("munmap %p %d\n", ptr, size); -#endif -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Wait for spin lock */ - slwait(&g_sl); -#endif - /* First time initialization */ - if (!g_pagesize) { - g_pagesize = getpagesize(); - } - if (!g_regionsize) { - g_regionsize = getregionsize(); - } - /* Assert preconditions */ - assert((unsigned)ptr % g_regionsize == 0); - assert(size % g_pagesize == 0); - /* Free this */ - if (!VirtualFree(ptr, 0, - MEM_RELEASE)) { - goto munmap_exit; - } - rc = 0; -#ifdef TRACE - printf("Release %p %d\n", ptr, size); -#endif -munmap_exit: -#if defined(USE_MALLOC_LOCK) && defined(NEEDED) - /* Release spin lock */ - slrelease(&g_sl); -#endif - return rc; -} - -static void vminfo(CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T *committed) { - MEMORY_BASIC_INFORMATION memory_info; - memory_info.BaseAddress = 0; - *free = *reserved = *committed = 0; - while (VirtualQuery(memory_info.BaseAddress, &memory_info, sizeof(memory_info))) { - switch (memory_info.State) { - case MEM_FREE: - *free += memory_info.RegionSize; - break; - case MEM_RESERVE: - *reserved += memory_info.RegionSize; - break; - case MEM_COMMIT: - *committed += memory_info.RegionSize; - break; - } - memory_info.BaseAddress = (char *)memory_info.BaseAddress + memory_info.RegionSize; - } -} - -static int cpuinfo(int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) { - if (whole) { - __int64 creation64, exit64, kernel64, user64; - int rc = GetProcessTimes(GetCurrentProcess(), - (FILETIME *)&creation64, - (FILETIME *)&exit64, - (FILETIME *)&kernel64, - (FILETIME *)&user64); - if (!rc) { - *kernel = 0; - *user = 0; - return FALSE; - } - *kernel = (CHUNK_SIZE_T)(kernel64 / 10000); - *user = (CHUNK_SIZE_T)(user64 / 10000); - return TRUE; - } else { - __int64 creation64, exit64, kernel64, user64; - int rc = GetThreadTimes(GetCurrentThread(), - (FILETIME *)&creation64, - (FILETIME *)&exit64, - (FILETIME *)&kernel64, - (FILETIME *)&user64); - if (!rc) { - *kernel = 0; - *user = 0; - return FALSE; - } - *kernel = (CHUNK_SIZE_T)(kernel64 / 10000); - *user = (CHUNK_SIZE_T)(user64 / 10000); - return TRUE; - } -} - -#endif /* WIN32 */ - -/* ------------------------------------------------------------ -History: - V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) - * Fix malloc_state bitmap array misdeclaration - - V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) - * Allow tuning of FIRST_SORTED_BIN_SIZE - * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. - * Better detection and support for non-contiguousness of MORECORE. - Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger - * Bypass most of malloc if no frees. Thanks To Emery Berger. - * Fix freeing of old top non-contiguous chunk im sysmalloc. - * Raised default trim and map thresholds to 256K. - * Fix mmap-related #defines. Thanks to Lubos Lunak. - * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. - * Branch-free bin calculation - * Default trim and mmap thresholds now 256K. - - V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) - * Introduce independent_comalloc and independent_calloc. - Thanks to Michael Pachos for motivation and help. - * Make optional .h file available - * Allow > 2GB requests on 32bit systems. - * new WIN32 sbrk, mmap, munmap, lock code from . - Thanks also to Andreas Mueller , - and Anonymous. - * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for - helping test this.) - * memalign: check alignment arg - * realloc: don't try to shift chunks backwards, since this - leads to more fragmentation in some programs and doesn't - seem to help in any others. - * Collect all cases in malloc requiring system memory into sYSMALLOc - * Use mmap as backup to sbrk - * Place all internal state in malloc_state - * Introduce fastbins (although similar to 2.5.1) - * Many minor tunings and cosmetic improvements - * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK - * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS - Thanks to Tony E. Bennett and others. - * Include errno.h to support default failure action. - - V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) - * return null for negative arguments - * Added Several WIN32 cleanups from Martin C. Fong - * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' - (e.g. WIN32 platforms) - * Cleanup header file inclusion for WIN32 platforms - * Cleanup code to avoid Microsoft Visual C++ compiler complaints - * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing - memory allocation routines - * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) - * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to - usage of 'assert' in non-WIN32 code - * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to - avoid infinite loop - * Always call 'fREe()' rather than 'free()' - - V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) - * Fixed ordering problem with boundary-stamping - - V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) - * Added pvalloc, as recommended by H.J. Liu - * Added 64bit pointer support mainly from Wolfram Gloger - * Added anonymously donated WIN32 sbrk emulation - * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen - * malloc_extend_top: fix mask error that caused wastage after - foreign sbrks - * Add linux mremap support code from HJ Liu - - V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) - * Integrated most documentation with the code. - * Add support for mmap, with help from - Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Use last_remainder in more cases. - * Pack bins using idea from colin@nyx10.cs.du.edu - * Use ordered bins instead of best-fit threshhold - * Eliminate block-local decls to simplify tracing and debugging. - * Support another case of realloc via move into top - * Fix error occuring when initial sbrk_base not word-aligned. - * Rely on page size for units instead of SBRK_UNIT to - avoid surprises about sbrk alignment conventions. - * Add mallinfo, mallopt. Thanks to Raymond Nijssen - (raymond@es.ele.tue.nl) for the suggestion. - * Add `pad' argument to malloc_trim and top_pad mallopt parameter. - * More precautions for cases where other routines call sbrk, - courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Added macros etc., allowing use in linux libc from - H.J. Lu (hjl@gnu.ai.mit.edu) - * Inverted this history list - - V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) - * Re-tuned and fixed to behave more nicely with V2.6.0 changes. - * Removed all preallocation code since under current scheme - the work required to undo bad preallocations exceeds - the work saved in good cases for most test programs. - * No longer use return list or unconsolidated bins since - no scheme using them consistently outperforms those that don't - given above changes. - * Use best fit for very large chunks to prevent some worst-cases. - * Added some support for debugging - - V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) - * Removed footers when chunks are in use. Thanks to - Paul Wilson (wilson@cs.texas.edu) for the suggestion. - - V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) - * Added malloc_trim, with help from Wolfram Gloger - (wmglo@Dent.MED.Uni-Muenchen.DE). - - V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) - - V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) - * realloc: try to expand in both directions - * malloc: swap order of clean-bin strategy; - * realloc: only conditionally expand backwards - * Try not to scavenge used bins - * Use bin counts as a guide to preallocation - * Occasionally bin return list chunks in first scan - * Add a few optimizations from colin@nyx10.cs.du.edu - - V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) - * faster bin computation & slightly different binning - * merged all consolidations to one part of malloc proper - (eliminating old malloc_find_space & malloc_clean_bin) - * Scan 2 returns chunks (not just 1) - * Propagate failure in realloc if malloc returns 0 - * Add stuff to allow compilation on non-ANSI compilers - from kpv@research.att.com - - V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) - * removed potential for odd address access in prev_chunk - * removed dependency on getpagesize.h - * misc cosmetics and a bit more internal documentation - * anticosmetics: mangled names in macros to evade debugger strangeness - * tested on sparc, hp-700, dec-mips, rs6000 - with gcc & native cc (hp, dec only) allowing - Detlefs & Zorn comparison study (in SIGPLAN Notices.) - - Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) - * Based loosely on libg++-1.2X malloc. (It retains some of the overall - structure of old version, but most details differ.) - -*/ - -#ifdef __cplusplus -}; /* end of extern "C" */ -#endif - -#endif /* MALLOC_270_H */