diff --git a/.github/workflows/ci-npu-test.yml b/.github/workflows/ci-npu-test.yml new file mode 100644 index 000000000..a81c15f5e --- /dev/null +++ b/.github/workflows/ci-npu-test.yml @@ -0,0 +1,299 @@ +name: Tests + +on: + push: + branches: [main, npu_ci] + paths-ignore: + - "docs_roll/**" + - "**/*.md" + - ".github/workflows/deploy.yml" + - ".github/workflows/daily-stats.yml" + pull_request: + branches: [main, npu_ci] + paths-ignore: + - "docs_roll/**" + - "**/*.md" + - ".github/workflows/deploy.yml" + - ".github/workflows/daily-stats.yml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-test: + name: Unit Tests (CPU) + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: | + requirements_common.txt + mcore_adapter/pyproject.toml + mcore_adapter/requirements.txt + setup.py + pyproject.toml + + - name: Install dependencies + run: | + pip install --upgrade pip + # Install PyTorch CPU-only to keep CI lightweight + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + # Install core test dependencies (subset of requirements_common.txt) + pip install pytest pytest-timeout pytest-asyncio numpy tensordict pydantic dacite \ + more_itertools hydra-core omegaconf peft==0.12.0 datasets==3.1.0 \ + trl==0.9.6 transformers ray[default] sympy deprecated codetiming pybase64 imageio \ + jsonschema mcp gem-llm==0.0.4 gym 'gymnasium[toy-text]' gym_sokoban + # Install mcore_adapter and roll itself + pip install -e ./mcore_adapter + pip install -e . + + - name: Run CPU-compatible unit tests + run: | + pytest tests/utils/test_action_parser.py \ + tests/utils/test_functionals.py \ + tests/utils/test_dynamic_batching.py \ + tests/utils/test_sequence_packing.py \ + tests/utils/test_taskgroups.py \ + tests/utils/test_cp_rmpad_ulysses_utils.py \ + tests/datasets/test_collator.py \ + tests/datasets/test_sampler.py \ + tests/agentic \ + tests/test_ref_worker_type_consistency.py \ + tests/distributed/scheduler/test_protocol.py \ + tests/distributed/scheduler/test_protocol_padding.py \ + tests/distributed/scheduler/test_decorator.py \ + tests/distributed/scheduler/test_resource_manager.py \ + -v --timeout=300 -x + env: + PYTHONPATH: ${{ github.workspace }} + ROLL_RUN_EXTERNAL_AGENTIC_TESTS: "0" + ROLL_RUN_AGENTIC_SANDBOX_TESTS: "0" + ROLL_RUN_AGENTIC_ENV_MANAGER_DEBUG_TESTS: "0" + + npu-test: + name: NPU Integration Tests + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository + runs-on: linux-aarch64-a3-8 + timeout-minutes: 120 + container: + # Pre-built NPU docker image (built from docker/Dockerfile.A3) with all deps pre-installed + image: swr.cn-north-4.myhuaweicloud.com/ascend-cicd/roll:main-a3 + env: + HF_ENDPOINT: https://hf-mirror.com + PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" + TASK_QUEUE_ENABLE: "2" + VLLM_USE_V1: "1" + # The CI vLLM smoke uses TP=1; FlashComm sequence parallelism requires TP>1. + VLLM_ASCEND_ENABLE_FLASHCOMM: "0" + SGLANG_KERNEL_NPU_REPO: https://github.com/sgl-project/sgl-kernel-npu.git + SGLANG_KERNEL_NPU_BRANCH: main + SGLANG_KERNEL_NPU_CACHE_KEY: main + SGLANG_REPO: https://github.com/sgl-project/sglang.git + SGLANG_BRANCH: ifmn/eagle-dp-attn + SGLANG_CACHE_KEY: ifmn-eagle-dp-attn + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Cache NPU pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-npu-pip-${{ env.SGLANG_KERNEL_NPU_CACHE_KEY }}-${{ env.SGLANG_CACHE_KEY }}-${{ hashFiles('requirements_common.txt', 'mcore_adapter/pyproject.toml', 'mcore_adapter/requirements.txt', 'setup.py', 'pyproject.toml', '.github/workflows/ci-npu-test.yml') }} + restore-keys: | + ${{ runner.os }}-npu-pip-${{ env.SGLANG_KERNEL_NPU_CACHE_KEY }}-${{ env.SGLANG_CACHE_KEY }}- + ${{ runner.os }}-npu-pip-${{ env.SGLANG_CACHE_KEY }}- + ${{ runner.os }}-npu-pip- + + - name: Configure Ascend runtime + shell: bash + run: | + set -eo pipefail + if [ -f /usr/local/Ascend/ascend-toolkit/set_env.sh ]; then + source /usr/local/Ascend/ascend-toolkit/set_env.sh + fi + if [ -f /usr/local/Ascend/nnal/atb/set_env.sh ]; then + source /usr/local/Ascend/nnal/atb/set_env.sh + fi + + export ASCEND_HOME_PATH="${ASCEND_HOME_PATH:-/usr/local/Ascend/ascend-toolkit/latest}" + export ASCEND_TOOLKIT_HOME="${ASCEND_TOOLKIT_HOME:-${ASCEND_HOME_PATH}}" + export ASCEND_OPP_PATH="${ASCEND_OPP_PATH:-${ASCEND_HOME_PATH}/opp}" + export ASCEND_AICPU_PATH="${ASCEND_AICPU_PATH:-${ASCEND_HOME_PATH}}" + export LD_LIBRARY_PATH="/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64/stub:/usr/local/Ascend/ascend-toolkit/latest/tools/hccl/lib64:/usr/local/Ascend/ascend-toolkit/latest/hccl/lib64:${LD_LIBRARY_PATH:-}" + + cann_python_paths=() + for path in \ + "${ASCEND_HOME_PATH}/python/site-packages" \ + "${ASCEND_HOME_PATH}/opp/built-in/op_impl/ai_core/tbe"; do + if [ -d "$path" ]; then + cann_python_paths+=("$path") + fi + done + if [ ${#cann_python_paths[@]} -gt 0 ]; then + export PYTHONPATH="$(IFS=:; echo "${cann_python_paths[*]}"):${PYTHONPATH:-}" + fi + + echo "ASCEND_HOME_PATH=${ASCEND_HOME_PATH}" >> "$GITHUB_ENV" + echo "ASCEND_TOOLKIT_HOME=${ASCEND_TOOLKIT_HOME}" >> "$GITHUB_ENV" + echo "ASCEND_OPP_PATH=${ASCEND_OPP_PATH}" >> "$GITHUB_ENV" + echo "ASCEND_AICPU_PATH=${ASCEND_AICPU_PATH}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" + echo "PYTHONPATH=${PYTHONPATH:-}" >> "$GITHUB_ENV" + echo "${ASCEND_HOME_PATH}/bin" >> "$GITHUB_PATH" + echo "${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin" >> "$GITHUB_PATH" + + - name: Show environment info + run: | + echo "=== Python ===" + python3 --version + python3 -m pip --version + echo "=== PyTorch ===" + python3 -c "import torch; print(f'torch={torch.__version__}')" + echo "=== NPU ===" + python3 -c " + import torch + import torch_npu + import importlib.util + + print(f'torch_npu={torch_npu.__version__}') + tbe_spec = importlib.util.find_spec('tbe') + print(f'tbe_module={tbe_spec is not None}') + if tbe_spec is None: + raise RuntimeError('CANN tbe Python module is not visible in PYTHONPATH') + for module_name in ('decorator', 'attrs', 'psutil', 'scipy', 'cloudpickle', 'tornado', 'ml_dtypes'): + module_spec = importlib.util.find_spec(module_name) + print(f'{module_name}_module={module_spec is not None}') + if not torch.npu.is_available(): + raise RuntimeError('torch.npu.is_available() is False') + print(f'npu_device_count={torch.npu.device_count()}') + " + echo "=== Ascend ===" + npu-smi info + + - name: Install pytest dependencies + run: | + pip install pytest-timeout + + - name: Install SGLang NPU kernel from source + shell: bash + run: | + set -eo pipefail + export SGLANG_KERNEL_NPU_SRC="/tmp/sgl-kernel-npu" + rm -rf "${SGLANG_KERNEL_NPU_SRC}" + git clone --depth 1 --branch "${SGLANG_KERNEL_NPU_BRANCH}" "${SGLANG_KERNEL_NPU_REPO}" "${SGLANG_KERNEL_NPU_SRC}" + cd "${SGLANG_KERNEL_NPU_SRC}" + python3 -m pip install pybind11 wheel + bash build.sh -a kernels + python3 -m pip install output/sgl_kernel_npu*.whl + python3 - <<'PY' + import sgl_kernel_npu + + print(f"sgl_kernel_npu={sgl_kernel_npu.__path__}") + PY + + - name: Install SGLang from source + shell: bash + run: | + set -eo pipefail + export SGLANG_SRC="/tmp/sglang" + rm -rf "${SGLANG_SRC}" + git clone --depth 1 --branch "${SGLANG_BRANCH}" "${SGLANG_REPO}" "${SGLANG_SRC}" + python3 - <<'PY' > "${SGLANG_SRC}/ci-requirements.txt" + import importlib.metadata + import os + import re + import tomllib + from pathlib import Path + + skip_packages = { + "cuda-python", + "flashinfer-cubin", + "flashinfer-python", + "nvidia-cutlass-dsl", + "nvidia-ml-py", + "sgl-kernel", + "torch", + "torch-memory-saver", + "torchaudio", + "torchao", + "torchcodec", + "torchvision", + "transformers", + } + + pyproject = Path(os.environ["SGLANG_SRC"]) / "python" / "pyproject.toml" + dependencies = tomllib.loads(pyproject.read_text())["project"]["dependencies"] + for dependency in dependencies: + package_name = re.split(r"[\[<>=!~; ]", dependency, maxsplit=1)[0] + package_name = package_name.replace("_", "-").lower() + if package_name in skip_packages: + continue + try: + importlib.metadata.version(package_name) + except importlib.metadata.PackageNotFoundError: + print(dependency) + PY + echo "Missing SGLang dependencies for CI:" + cat "${SGLANG_SRC}/ci-requirements.txt" + python3 -m pip install -r "${SGLANG_SRC}/ci-requirements.txt" + python3 -m pip install --no-deps -e "${SGLANG_SRC}/python" + python3 - <<'PY' + import importlib.metadata + + print(f"sglang={importlib.metadata.version('sglang')}") + PY + + - name: Install ROLL + run: | + pip install -e ./mcore_adapter + pip install -e . + + - name: Show vLLM Ascend info + run: | + python3 - <<'PY' + import importlib.metadata + + import vllm + import vllm_ascend + from roll.platforms import current_platform + + for package_name in ("transformers", "deepspeed", "triton-ascend"): + try: + package_version = importlib.metadata.version(package_name) + except importlib.metadata.PackageNotFoundError: + package_version = "not installed" + print(f"{package_name}={package_version}") + + print(f"vllm={vllm.__version__}") + print(f"platform={current_platform.device_type}") + PY + + - name: Run remaining NPU-compatible unit tests + run: | + export PYTHONPATH="${GITHUB_WORKSPACE}:${PYTHONPATH:-}" + python3 -m pytest tests/third_party/sglang \ + tests/third_party/vllm \ + tests/datasets \ + tests/distributed \ + tests/models \ + tests/pipeline \ + tests/third_party/deepspeed \ + tests/utils/ \ + tests/test_ref_worker_type_consistency.py \ + --ignore=tests/models/cuda_mem \ + -v --timeout=600 -x + env: + ROLL_NPU_CI: "1" + DS_UNITTEST_TIMEOUT: "600" diff --git a/.gitignore b/.gitignore index 5e7b73d3c..7e6830569 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,4 @@ -# Ignore all png files *.png - -# But allow png files in static/img directory -!docs_roll/static/img/*.png *.pyc */checkpoint_dir */dataset diff --git a/README.md b/README.md index 34b38ffcd..4598eb318 100644 --- a/README.md +++ b/README.md @@ -41,30 +41,32 @@ Leveraging a multi-role distributed architecture with Ray for flexible resource ## 📢 News -| 📣 Updates | -|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **[01/01/2026]** 🎉 Our [Let It Flow: Agentic Crafting on Rock and Roll](https://arxiv.org/abs/2512.24873) report released! Introducing ALE ecosystem and ROME, an open-source agentic model with novel IPA algorithm. | -| **[11/08/2025]** 🎉 Our [ROCK: Reinforcement Open Construction Kit](https://github.com/alibaba/ROCK) released, Explore the new capabilities!. | -| **[10/23/2025]** 🎉 Our Papers released, see [Asymmetric Proximal Policy Optimization: mini-critics boost LLM reasoning](https://arxiv.org/abs/2510.01656) and [Attention Illuminates LLM Reasoning: The Preplan-and-Anchor Rhythm Enables Fine-Grained Policy Optimization](https://arxiv.org/abs/2510.13554). | -| **[10/14/2025]** 🎉 Our Paper released, see [Part II: ROLL Flash -- Accelerating RLVR and Agentic Training with Asynchrony](https://arxiv.org/abs/2510.11345). | -| **[09/28/2025]** 🎉 Ascend NPU support — see [usage guide](https://alibaba.github.io/ROLL/docs/User%20Guides/Hardware%20Support/ascend_usage). | -| **[09/25/2025]** 🎉 Our Paper released, see [RollPacker: Mitigating Long-Tail Rollouts for Fast, Synchronous RL Post-Training](https://arxiv.org/abs/2509.21009) | -| **[09/24/2025]** 🎉 Support [Wan2_2 Reward FL pipeline](examples/wan2.2-14B-reward_fl_ds/reward_fl_config.yaml). Explore the new capabilities! | -| **[09/23/2025]** 🎉 ROLL aligns with GEM environment definition, providing agentic Tool Use training capabilities, [ToolUse docs](docs_roll/docs/English/UserGuide/agentic/Tool_Use.md). | -| **[09/16/2025]** 🎉 Qwen3-Next model training is supported, refer to [configuration](examples/qwen3-next-80BA3B-rlvr_megatron/rlvr_config.yaml). | -| **[09/04/2025]** 🎉 ROLL supports vLLM dynamic FP8 rollout and remove_padding for acceleration. | -| **[08/28/2025]** 🎉 ROLL supports SFT pipeline, refer to [configuration](examples/qwen2.5-7B-sft_megatron/sft_config.yaml). | -| **[08/13/2025]** 🎉 ROLL supports AMD GPUs with out-of-box image docker and Dockerfile and specific yamls under `examples/` directory. Please refer to [Installation](https://alibaba.github.io/ROLL/docs/Getting%20Started/Installation/). | -| **[08/11/2025]** 🎉 Our Paper released, see [Part I: Tricks or Traps? A Deep Dive into RL for LLM Reasoning](https://arxiv.org/abs/2508.08221). | -| **[08/10/2025]** 🎉 Agentic RL supports [stepwise learning](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake_gigpo.yaml), like [GiGPO](https://arxiv.org/abs/2505.10978); Distill supports [VLM](examples/qwen2.5-vl-7B-distill/distill_vl_megatron.yaml). Explore the new capabilities! | -| **[08/06/2025]** 🎉 ROLL PPT is now available, [Slides](assets/ROLL%20高效且用户友好的大模型RL训练框架.pdf). | -| **[07/31/2025]** 🎉 Refactor agentic rl design. Support agentic rl [async training](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake_async.yaml). Explore the new capabilities! | -| **[07/31/2025]** 🎉 Support [DistillPipeline](examples/qwen2.5-7B-distill_megatron/run_distill_pipeline.sh)/[DpoPipeline](examples/qwen2.5-3B-dpo_megatron/run_dpo_pipeline.sh). Support [lora](examples/qwen2.5-7B-rlvr_megatron/rlvr_lora_zero3.yaml). Support [GSPO](https://arxiv.org/abs/2507.18071) | -| **[06/25/2025]** 🎉 Support thread env for env scaling and support [qwen2.5 VL agentic pipeline](examples/qwen2.5-vl-3B-agentic/agentic_val_sokoban.yaml). | -| **[06/13/2025]** 🎉 Support [Qwen2.5 VL rlvr pipeline](examples/qwen2.5-vl-7B-rlvr/rlvr_megatron.yaml) and upgrade mcore to 0.12 version. | -| **[06/09/2025]** 🎉 ROLL tech report is now available! Access the report [here](https://arxiv.org/abs/2506.06122). | -| **[06/08/2025]** 🎉Supports Qwen3([8B](examples/qwen3-8B-rlvr_megatron/rlvr_config.yaml)/14B/32B), Qwen3-MoE([30A3](examples/qwen3-30BA3B-rlvr_megatron/rlvr_config.yaml)/[235A22](examples/qwen3-235BA22B-rlvr_megatron/rlvr_config.yaml)), Qwen2.5([7B](examples/qwen2.5-7B-rlvr_megatron/rlvr_config.yaml)/14B/32B/72B) LLM models. | -| **[05/30/2025]** 🎉 Training [RLVR](examples/qwen2.5-7B-rlvr_megatron/rlvr_config.yaml) and [Agentic RL](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake.yaml) with ROLL is now available! Explore the new capabilities. | +| 📣 Updates | +|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **[03/06/2026]** 🎉 We support Qwen3.5 [Dense](examples/qwen3.5-35BA3-rlvr_megatron/rlvr_megatron_80GB.yaml) and [MoE](examples/qwen3.5-35BA3-rlvr_megatron/rlvr_megatron_80GB.yaml) series models and [on-policy distill](docs_roll/i18n/zh-Hans/docusaurus-plugin-content-docs/current/User Guides/Pipeline/on_policy_distill_pipeline_start.md). Welcome to use! | +| **[02/03/2026]** 🎉 We released FSDP2 Strategy, Megatron with LoRA, GPU partial overlapping, Qwen3-Omni supports and other features. For more details, please refer to the release notes. Welcome to use! | +| **[01/01/2026]** 🎉 Our [Let It Flow: Agentic Crafting on Rock and Roll](https://arxiv.org/abs/2512.24873) report released! Introducing ALE ecosystem and ROME, an open-source agentic model with novel IPA algorithm. | +| **[11/08/2025]** 🎉 Our [ROCK: Reinforcement Open Construction Kit](https://github.com/alibaba/ROCK) released, Explore the new capabilities!. | +| **[10/23/2025]** 🎉 Our Papers released, see [Asymmetric Proximal Policy Optimization: mini-critics boost LLM reasoning](https://arxiv.org/abs/2510.01656) and [Attention Illuminates LLM Reasoning: The Preplan-and-Anchor Rhythm Enables Fine-Grained Policy Optimization](https://arxiv.org/abs/2510.13554). | +| **[10/14/2025]** 🎉 Our Paper released, see [Part II: ROLL Flash -- Accelerating RLVR and Agentic Training with Asynchrony](https://arxiv.org/abs/2510.11345). | +| **[09/28/2025]** 🎉 Ascend NPU support — see [usage guide](https://alibaba.github.io/ROLL/docs/User%20Guides/Hardware%20Support/ascend_usage). | +| **[09/25/2025]** 🎉 Our Paper released, see [RollPacker: Mitigating Long-Tail Rollouts for Fast, Synchronous RL Post-Training](https://arxiv.org/abs/2509.21009) | +| **[09/24/2025]** 🎉 Support [Wan2_2 Reward FL pipeline](examples/wan2.2-14B-reward_fl_ds/reward_fl_config.yaml). Explore the new capabilities! | +| **[09/23/2025]** 🎉 ROLL aligns with GEM environment definition, providing agentic Tool Use training capabilities, [ToolUse docs](docs_roll/docs/English/UserGuide/agentic/Tool_Use.md). | +| **[09/16/2025]** 🎉 Qwen3-Next model training is supported, refer to [configuration](examples/qwen3-next-80BA3B-rlvr_megatron/rlvr_config.yaml). | +| **[09/04/2025]** 🎉 ROLL supports vLLM dynamic FP8 rollout and remove_padding for acceleration. | +| **[08/28/2025]** 🎉 ROLL supports SFT pipeline, refer to [configuration](examples/qwen2.5-7B-sft_megatron/sft_config.yaml). | +| **[08/13/2025]** 🎉 ROLL supports AMD GPUs with out-of-box image docker and Dockerfile and specific yamls under `examples/` directory. Please refer to [Installation](https://alibaba.github.io/ROLL/docs/Getting%20Started/Installation/). | +| **[08/11/2025]** 🎉 Our Paper released, see [Part I: Tricks or Traps? A Deep Dive into RL for LLM Reasoning](https://arxiv.org/abs/2508.08221). | +| **[08/10/2025]** 🎉 Agentic RL supports [stepwise learning](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake_gigpo.yaml), like [GiGPO](https://arxiv.org/abs/2505.10978); Distill supports [VLM](examples/qwen2.5-vl-7B-distill/distill_vl_megatron.yaml). Explore the new capabilities! | +| **[08/06/2025]** 🎉 ROLL PPT is now available, [Slides](assets/ROLL%20高效且用户友好的大模型RL训练框架.pdf). | +| **[07/31/2025]** 🎉 Refactor agentic rl design. Support agentic rl [async training](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake_async.yaml). Explore the new capabilities! | +| **[07/31/2025]** 🎉 Support [DistillPipeline](examples/qwen2.5-7B-distill_megatron/run_distill_pipeline.sh)/[DpoPipeline](examples/dpo_examples/run_dpo_pipeline.sh). Support [lora](examples/qwen2.5-7B-rlvr_megatron/rlvr_lora_zero3.yaml). Support [GSPO](https://arxiv.org/abs/2507.18071) | +| **[06/25/2025]** 🎉 Support thread env for env scaling and support [qwen2.5 VL agentic pipeline](examples/qwen2.5-vl-3B-agentic/agentic_val_sokoban.yaml). | +| **[06/13/2025]** 🎉 Support [Qwen2.5 VL rlvr pipeline](examples/qwen2.5-vl-7B-rlvr/rlvr_megatron.yaml) and upgrade mcore to 0.12 version. | +| **[06/09/2025]** 🎉 ROLL tech report is now available! Access the report [here](https://arxiv.org/abs/2506.06122). | +| **[06/08/2025]** 🎉Supports Qwen3([8B](examples/qwen3-8B-rlvr_megatron/rlvr_config.yaml)/14B/32B), Qwen3-MoE([30A3](examples/qwen3-30BA3B-rlvr_megatron/rlvr_config.yaml)/[235A22](examples/qwen3-235BA22B-rlvr_megatron/rlvr_config.yaml)), Qwen2.5([7B](examples/qwen2.5-7B-rlvr_megatron/rlvr_config.yaml)/14B/32B/72B) LLM models. | +| **[05/30/2025]** 🎉 Training [RLVR](examples/qwen2.5-7B-rlvr_megatron/rlvr_config.yaml) and [Agentic RL](examples/qwen2.5-0.5B-agentic/agent_val_frozen_lake.yaml) with ROLL is now available! Explore the new capabilities. | --- @@ -105,7 +107,7 @@ Leveraging a multi-role distributed architecture with Ray for flexible resource [RewardFL](https://alibaba.github.io/ROLL/docs/User%20Guides/Algorithms/Reward_FL) #### Backend -[DeepSpeed](https://alibaba.github.io/ROLL/docs/User%20Guides/Configuration/deepspeed) +[DeepSeed](https://alibaba.github.io/ROLL/docs/User%20Guides/Configuration/deepspeed) [Megatron](https://alibaba.github.io/ROLL/docs/User%20Guides/Configuration/megatron) [vLLM](https://alibaba.github.io/ROLL/docs/User%20Guides/Configuration/vllm) [SGLang](https://alibaba.github.io/ROLL/docs/User%20Guides/Configuration/sglang) @@ -151,21 +153,13 @@ Leveraging a multi-role distributed architecture with Ray for flexible resource * DPO Pipeline * SFT Pipeline under development - - ---- - -## 🔮 Upcoming Features - -We are continuously working to expand ROLL's capabilities: -* ⏱️ **Async RLVR pipeline**: For even more efficient and streamlined asynchronous operations. -* ⚙️ **FSDP2**: Integrating the latest Fully Sharded Data Parallel techniques. -* 🔍 **Support DeepseekV3**: Adding compatibility for the newest Deepseek models. - --- ## 🏆 Notable work based on ROLL -- [SocioReasoner](https://github.com/AMAP-ML/SocioReasoner): A vision-language method for urban socio-semantic segmentation that employs a render-and-refine mechanism optimized by RL to identify abstract social entities using satellite and map data. +- [Freshness-Aware-PER](https://arxiv.org/abs/2604.16918): A freshness-aware prioritized experience replay framework for LLM/VLM reinforcement learning, combining reward magnitude with exponential age decay (`reward_fresh` priority) and asynchronous full-buffer refresh, providing fresher and higher-signal off-policy samples for both step- and trajectory-level agentic RL. [code](https://github.com/Vision-CAIR/Freshness-Aware-PER) +- [ComplementaryRL](https://arxiv.org/abs/2603.17621): Complementary RL is a learning framework that enables agents to effectively learn from experience through the seamless co-evolution of an experience extractor and a policy actor within the RL optimization loop. +- [RLix](https://github.com/rlops/rlix): RLix is an RL job manager that lets more RL jobs run concurrently with less waiting by sharing GPU capacity across jobs, while preserving each pipeline’s training behavior and improving GPU utilization. +- [TurningPoint-GRPO](https://arxiv.org/abs/2602.06422): A GRPO framework for Flow Matching models in text-to-image generation that alleviates step-wise reward sparsity by modeling step-level incremental rewards and explicitly captures long-term effects via turning points detection, providing dense learning signals for each denoising action. - [STAgent](https://arxiv.org/abs/2512.24957): An agentic LLM specialized for spatio-temporal understanding and complex tasks like constrained POI discovery and itinerary planning, featuring hierarchical data curation with 1:10,000 filter ratio and cascaded training (seed SFT + difficulty-aware SFT + RL), achieving strong performance on TravelBench while preserving general capabilities. - [IPRO](https://arxiv.org/abs/2510.14255): A novel video diffusion framework using reinforcement learning to enhance identity preservation in human-centric I2V generation, optimizing diffusion models with face identity scorer and KL-divergence regularization. - [TaoSR-SHE](https://arxiv.org/abs/2510.07972): Stepwise Hybrid Examination Reinforcement Learning Framework for Taobao Search Relevance, with SRPO (hybrid reward model + offline verifier), diversified data filtering, and multi-stage curriculum learning. diff --git a/data/deepeyes_mini_10.parquet b/data/deepeyes_mini_10.parquet new file mode 100644 index 000000000..d6ee3ef02 Binary files /dev/null and b/data/deepeyes_mini_10.parquet differ diff --git a/data/swe_bench_verified_example.jsonl b/data/swe_bench_verified_example.jsonl new file mode 100644 index 000000000..174852939 --- /dev/null +++ b/data/swe_bench_verified_example.jsonl @@ -0,0 +1,10 @@ +{"id": "222", "task_name": "sympy__sympy-12096", "category": "debugging", "prompt": "evalf does not call _imp_ recursively\nExample from https://stackoverflow.com/questions/41818842/why-cant-i-evaluate-a-composition-of-implemented-functions-in-sympy-at-a-point:\n\n```\n>>> from sympy.utilities.lambdify import implemented_function\n>>> f = implemented_function('f', lambda x: x ** 2)\n>>> g = implemented_function('g', lambda x: 2 * x)\n>>> print(f( 2 ).evalf())\n4.00000000000000\n>>> print( g(2) .evalf())\n4.00000000000000\n>>> print(f(g(2)).evalf())\nf(g(2))\n```\n\nThe code for this is in `Function._eval_evalf`. It isn't calling evalf recursively on the return of `_imp_`.\n", "sandbox_image": "slimshetty/swebench-verified:sweb.eval.x86_64.sympy__sympy-12096", "run_region": "", "start_script": "", "score": 1.0} +{"id": "476", "task_name": "django__django-12143", "category": "debugging", "prompt": "Possible data loss in admin changeform view when using regex special characters in formset prefix\nDescription\n\n\t\t(last modified by Baptiste Mispelon)\n\nWhile browsing the code in admin/options.py [1] (working on an unrelated ticket), I came across that line:\npk_pattern = re.compile(r'{}-\\d+-{}$'.format(prefix, self.model._meta.pk.name))\nGenerating a regex like this using string formatting can cause problems when the arguments contain special regex characters.\nself.model._meta.pk.name is probably safe (I'm not 100% sure about this) since it has to follow Python's syntax rules about identifiers.\nHowever prefix has no such restrictions [2] and could contain any number of special regex characters.\nThe fix is quite straightforward (use re.escape()) but it's hard to tell if there might be other occurrences of a similar pattern in Django's code.\nSome quick grepping (using git grep -E '(re_compile|re\\.(compile|search|match))' -- 'django/**.py') currently yields about 200 results. I had a superficial glance through the list and didn't spot other instances of the same usage pattern.\nEDIT I forgot to mention, but this bug is technically a regression (introduced in b18650a2634890aa758abae2f33875daa13a9ba3).\n[1] \u200bhttps://github.com/django/django/blob/ef93fd4683645635d3597e17c23f9ed862dd716b/django/contrib/admin/options.py#L1634\n[2] \u200bhttps://docs.djangoproject.com/en/dev/topics/forms/formsets/#customizing-a-formset-s-prefix\n", "sandbox_image": "slimshetty/swebench-verified:sweb.eval.x86_64.django__django-12143", "run_region": "", "start_script": "", "score": 1.0} +{"id": "463", "task_name": "django__django-11951", "category": "debugging", "prompt": "bulk_create batch_size param overrides the compatible batch size calculation\nDescription\n\n\t\t(last modified by Ahmet Kucuk)\n\nAt this line: \u200bhttps://github.com/django/django/blob/stable/2.2.x/django/db/models/query.py#L1197\nbatch_size param overrides compatible batch size calculation. This looks like a bug as bulk_update properly picks the minimum of two:\n\u200bhttps://github.com/django/django/blob/stable/2.2.x/django/db/models/query.py#L504\nI suggest using similar\n batch_size = min(batch_size, max_batch_size) if batch_size else max_batch_size\nlogic in bulk_create as well. I am happy to open a PR for it.\n", "sandbox_image": "slimshetty/swebench-verified:sweb.eval.x86_64.django__django-11951", "run_region": "", "start_script": "", "score": 1.0} +{"id": "216", "task_name": "django__django-15103", "category": "debugging", "prompt": "Make the element_id argument of json_script optional\nDescription\n\nI recently had a use-case where I wanted to use json_script but I didn't need any id for it (I was including the