Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ raylib*/
box2d*/

# Temp Impulse Wars files
pufferlib/ocean/impulse_wars/*-debug/
pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/
ocean/impulse_wars/*-debug/
ocean/impulse_wars/*-release/
ocean/impulse_wars/debug-*/
ocean/impulse_wars/release-*/
ocean/impulse_wars/benchmark/

# Data
resources/drive/data/*
Expand Down
45 changes: 30 additions & 15 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,23 @@ if [ "$ENV" = "all" ]; then
exit 0
fi

STANDALONE_LDFLAGS=(-fuse-ld=lld)
SHARED_LDFLAGS=(-fuse-ld=lld)

# Linux/mac
PLATFORM="$(uname -s)"
if [ "$PLATFORM" = "Linux" ]; then
RAYLIB_NAME='raylib-5.5_linux_amd64'
OMP_LIB=-lomp5
SANITIZE_FLAGS=(-fsanitize=address,undefined,bounds,pointer-overflow,leak -fno-omit-frame-pointer)
STANDALONE_LDFLAGS=(-lGL)
SHARED_LDFLAGS=(-Bsymbolic-functions)
STANDALONE_LDFLAGS+=(-lGL)
SHARED_LDFLAGS+=(-Bsymbolic-functions)
else
RAYLIB_NAME='raylib-5.5_macos'
OMP_LIB=-lomp
SANITIZE_FLAGS=()
STANDALONE_LDFLAGS=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL)
SHARED_LDFLAGS=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup)
STANDALONE_LDFLAGS+=(-framework Cocoa -framework IOKit -framework CoreVideo -framework OpenGL)
SHARED_LDFLAGS+=(-framework Cocoa -framework OpenGL -framework IOKit -undefined dynamic_lookup)
fi

CLANG_WARN=(
Expand All @@ -75,6 +78,7 @@ CLANG_WARN=(
-Wno-incompatible-pointer-types-discards-qualifiers
-Wno-error=array-parameter
)
CLANG_OPT=()

download() {
local name=$1 url=$2
Expand Down Expand Up @@ -108,14 +112,24 @@ elif [ "$ENV" = "trailer" ]; then
OUTPUT_NAME="trailer/trailer"
elif [ "$ENV" = "impulse_wars" ]; then
SRC_DIR="ocean/$ENV"
if [ "$MODE" = "web" ]; then BOX2D_NAME='box2d-web'
elif [ "$PLATFORM" = "Linux" ]; then BOX2D_NAME='box2d-linux-amd64'
else BOX2D_NAME='box2d-macos-arm64'
if [ "$MODE" = "web" ]; then
BOX2D_NAME='box2d-web'
elif [ "$PLATFORM" = "Linux" ]; then
BOX2D_NAME='box2d-linux-amd64'
else
BOX2D_NAME='box2d-macos-arm64'
fi

BOX2D_URL="https://github.com/capnspacehook/box2d/releases/latest/download"
download "$BOX2D_NAME" "$BOX2D_URL/$BOX2D_NAME.tar.gz"
INCLUDES+=(-I./$BOX2D_NAME/include -I./$BOX2D_NAME/src)
LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a")

if [ -z "$DEBUG" ]; then
CLANG_OPT+=(-flto -fno-math-errno -march=native)
LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2d.a")
else
LINK_ARCHIVES+=("./$BOX2D_NAME/libbox2dd.a")
fi
elif [ -d "ocean/$ENV" ]; then
SRC_DIR="ocean/$ENV"
else
Expand All @@ -126,13 +140,13 @@ OUTPUT_NAME=${OUTPUT_NAME:-$ENV}

# Standalone environment build
if [ -n "$DEBUG" ] || [ "$MODE" = "local" ]; then
CLANG_OPT=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}")
CLANG_OPT+=(-g -O0 "${CLANG_WARN[@]}" "${SANITIZE_FLAGS[@]}")
NVCC_OPT="-O0 -g"
LINK_OPT="-g"
else
CLANG_OPT=(-O2 -DNDEBUG "${CLANG_WARN[@]}")
NVCC_OPT="-O2 --threads 0"
LINK_OPT="-O2"
CLANG_OPT+=(-O3 -DNDEBUG "${CLANG_WARN[@]}")
NVCC_OPT="-O3 --threads 0"
LINK_OPT="-O3"
fi
if [ "$MODE" = "local" ] || [ "$MODE" = "fast" ]; then
FLAGS=(
Expand Down Expand Up @@ -242,6 +256,7 @@ echo "Compiling static library for $ENV..."
${CC:-clang} -c "${CLANG_OPT[@]}" $EXTRA_CFLAGS \
-I. -Isrc -I$SRC_DIR -Ivendor \
-I./$RAYLIB_NAME/include -I$CUDA_HOME/include \
"${INCLUDES[@]}" \
-DPLATFORM_DESKTOP \
-fno-semantic-interposition -fvisibility=hidden \
-fPIC -fopenmp \
Expand Down Expand Up @@ -273,7 +288,7 @@ if [ -z "$MODE" ]; then

LINK_CMD=(
${CXX:-g++} -shared -fPIC -fopenmp
build/bindings.o "$STATIC_LIB" "$RAYLIB_A"
build/bindings.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}"
-L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG
"${WHEEL_RPATH_FLAGS[@]}"
-lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn
Expand All @@ -298,7 +313,7 @@ elif [ "$MODE" = "cpu" ]; then
src/bindings_cpu.cpp -o build/bindings_cpu.o
LINK_CMD=(
${CXX:-g++} -shared -fPIC -fopenmp
build/bindings_cpu.o "$STATIC_LIB" "$RAYLIB_A"
build/bindings_cpu.o "$STATIC_LIB" "${LINK_ARCHIVES[@]}"
-lm -lpthread $OMP_LIB $LINK_OPT
"${SHARED_LDFLAGS[@]}"
-o "$OUTPUT"
Expand All @@ -317,7 +332,7 @@ elif [ "$MODE" = "profile" ]; then
$PRECISION \
-Xcompiler=-fopenmp \
tests/profile_kernels.cu vendor/ini.c \
"$STATIC_LIB" "$RAYLIB_A" \
"$STATIC_LIB" "${LINK_ARCHIVES[@]}" \
-lnccl -lnvidia-ml -lcublas -lcurand -lcudnn \
-lGL -lm -lpthread $OMP_LIB \
-o profile
Expand Down
142 changes: 12 additions & 130 deletions config/impulse_wars.ini
Original file line number Diff line number Diff line change
@@ -1,143 +1,25 @@
[base]
env_name = impulse_wars

max_suggestion_cost = 10_800

[policy]
hidden_size = 512
cnn_channels = 64

# These must match what's set in env below
continuous = False
num_drones = 2
is_training = True

[vec]
num_envs = 4
#num_workers = 4
#batch_size = 4

[env]
num_envs = 1024
num_drones = 2
num_agents = 1
enable_teams = False
sitting_duck = False
sitting_duck = True
continuous = False
is_training = True

reward_win = 2.0
reward_self_kill = -1.0
reward_enemy_death = 1.0
reward_enemy_kill = 1.0
reward_death = 0.0
reward_energy_emptied = -0.75
reward_weapon_pickup = 0.5
reward_shield_break = 0.5
reward_shot_hit_coef = 0.005
reward_explosion_hit_coef = 0.005

[train]
total_timesteps = 1_000_000_000
checkpoint_interval = 250

learning_rate = 0.005

compile = False
compile_mode = reduce-overhead
compile_fullgraph = False


[sweep]
downsample = 10
max_cost = 900

[sweep.env.num_envs]
distribution = uniform_pow2
min = 1
max = 1024
mean = 128
scale = auto

# reward parameters
[sweep.env.reward_win]
distribution = uniform
min = 0.0
mean = 2.0
max = 5.0
scale = auto

[sweep.env.reward_self_kill]
distribution = uniform
min = -3.0
mean = -1.0
max = 0.0
scale = auto

[sweep.env.reward_enemy_death]
distribution = uniform
min = 0.0
mean = 1.0
max = 3.0
scale = auto

[sweep.env.reward_kill]
distribution = uniform
min = 0.0
mean = 1.0
max = 3.0
scale = auto

[sweep.env.reward_death]
distribution = uniform
min = -1.0
mean = -0.25
max = 0.0
scale = auto

[sweep.env.reward_energy_emptied]
distribution = uniform
min = -2.0
mean = -0.75
max = 0.0
scale = auto

[sweep.env.reward_weapon_pickup]
distribution = uniform
min = 0.0
mean = 0.5
max = 3.0
scale = auto

[sweep.env.reward_shield_break]
distribution = uniform
min = 0.0
mean = 0.5
max = 3.0
scale = auto

[sweep.env.reward_shot_hit_coef]
distribution = log_normal
min = 0.0005
mean = 0.005
max = 0.05
scale = auto

[sweep.env.reward_explosion_hit_coef]
distribution = log_normal
min = 0.0005
mean = 0.005
max = 0.05
scale = auto

# hyperparameters
[sweep.train.total_timesteps]
distribution = log_normal
min = 250_000_000
max = 1_500_000_000
mean = 500_000_000
scale = time

[sweep.train.batch_size]
distribution = uniform_pow2
min = 65_536
max = 1_048_576
mean = 262_144
scale = auto

[sweep.train.horizon]
distribution = uniform_pow2
min = 64
max = 256
mean = 128
scale = auto

Loading
Loading