neuralmagic · Chibukach · May 11, 2026 · May 14, 2026 · Jun 11, 2026
diff --git a/README.md b/README.md
@@ -291,13 +291,14 @@ uv run python post_codegen.py
 
 ## 🔌 Eval Converters
 
-We have prepared converters to make adapting to our schema as easy as possible. At the moment, we support converting local evaluation harness logs from `Inspect AI`, `HELM` and `lm-evaluation-harness` into our unified schema. Each converter produces aggregate JSON and optionally instance-level JSONL output.
+We have prepared converters to make adapting to our schema as easy as possible. At the moment, we support converting local evaluation harness logs from `Inspect AI`, `HELM`, `lm-evaluation-harness`, and `lighteval` into our unified schema. Each converter produces aggregate JSON and optionally instance-level JSONL output.
 
 | Framework | Command | Instance-Level JSONL |
 |---|---|---|
 | [Inspect AI](every_eval_ever/converters/inspect/) | `every_eval_ever convert inspect --log_path <path>` | Yes, if samples in log |
 | [HELM](every_eval_ever/converters/helm/) | `every_eval_ever convert helm --log_path <path>` | Always |
 | [lm-evaluation-harness](every_eval_ever/converters/lm_eval/) | `every_eval_ever convert lm_eval --log_path <path> --include_samples` | With `--include_samples` |
+| [lighteval](every_eval_ever/converters/lighteval/) | `every_eval_ever convert lighteval --log_path <path>` | No |
 
 For full CLI usage and required input files, see the [Eval Converters README](every_eval_ever/converters/README.md).
 

diff --git a/every_eval_ever/cli.py b/every_eval_ever/cli.py
@@ -108,6 +108,33 @@ def _cmd_convert_lm_eval(args: argparse.Namespace) -> int:
     return 0
 
 
+def _cmd_convert_lighteval(args: argparse.Namespace) -> int:
+    from every_eval_ever.converters.lighteval.adapter import LightEvalAdapter
+
+    adapter = LightEvalAdapter()
+    metadata = _common_metadata(args)
+    if args.inference_engine:
+        metadata['inference_engine'] = args.inference_engine
+    if args.inference_engine_version:
+        metadata['inference_engine_version'] = args.inference_engine_version
+
+    log_path = Path(args.log_path)
+    if log_path.is_file():
+        logs = adapter.transform_from_file(log_path, metadata)
+    elif log_path.is_dir():
+        logs = adapter.transform_from_directory(log_path, metadata)
+    else:
+        raise FileNotFoundError(f'Path is not a file or directory: {log_path}')
+
+    output_dir = Path(args.output_dir)
+    for log in logs:
+        eval_uuid = str(uuid.uuid4())
+        print(_write_log(log, output_dir, eval_uuid=eval_uuid))
+
+    print(f'Converted {len(logs)} evaluation log(s).')
+    return 0
+
+
 def _cmd_convert_inspect(args: argparse.Namespace) -> int:
     from every_eval_ever.converters.inspect.adapter import (
         InspectAIAdapter,
@@ -241,6 +268,7 @@ def build_parser() -> argparse.ArgumentParser:
         epilog=(
             'Examples:\n'
             '  every_eval_ever convert lm_eval --log_path results.json --output_dir data\n'
+            '  every_eval_ever convert lighteval --log_path results_run_dir --output_dir data\n'
             '  every_eval_ever convert inspect --log_path inspect_log.json --output_dir data\n'
             '  every_eval_ever convert helm --log_path helm_run_dir --output_dir data'
         ),
@@ -298,7 +326,13 @@ def build_parser() -> argparse.ArgumentParser:
         dest='source', required=True
     )
 
-    for source in ['lm_eval', 'inspect', 'helm', 'alpaca_eval']:
+    for source in [
+        'lm_eval',
+        'lighteval',
+        'inspect',
+        'helm',
+        'alpaca_eval',
+    ]:
         source_parser = convert_subparsers.add_parser(
             source,
             help=f'Convert {source} logs',
@@ -385,6 +419,20 @@ def build_parser() -> argparse.ArgumentParser:
                 help='Inference engine version to record in model_info.inference_engine.version.',
             )
 
+        if source == 'lighteval':
+            source_parser.add_argument(
+                '--inference_engine',
+                '--inference-engine',
+                default=None,
+                help='Override inferred inference engine (e.g. vllm, transformers).',
+            )
+            source_parser.add_argument(
+                '--inference_engine_version',
+                '--inference-engine-version',
+                default=None,
+                help='Inference engine version to record in model_info.inference_engine.version.',
+            )
+
     return parser
 
 
@@ -415,6 +463,8 @@ def main(argv: list[str] | None = None) -> int:
     if args.command == 'convert':
         if args.source == 'lm_eval':
             return _cmd_convert_lm_eval(args)
+        if args.source == 'lighteval':
+            return _cmd_convert_lighteval(args)
         if args.source == 'inspect':
             return _cmd_convert_inspect(args)
         if args.source == 'helm':

diff --git a/every_eval_ever/converters/README.md b/every_eval_ever/converters/README.md
@@ -1,5 +1,5 @@
 ## Automatic Evaluation Log Converters
-A collection of scripts to convert evaluation logs from local evaluation frameworks (e.g., `Inspect AI` and `lm-eval-harness`) and public leaderboards (e.g., AlpacaEval) into the unified Every Eval Ever schema.
+A collection of scripts to convert evaluation logs from local evaluation frameworks (e.g., `Inspect AI`, `lm-eval-harness`, and `lighteval`) and public leaderboards (e.g., AlpacaEval) into the unified Every Eval Ever schema.
 
 ### Installation
 
@@ -171,7 +171,6 @@ Using the `--log_path` argument, you can run a command like this:
 uv run every_eval_ever convert lm_eval --log_path tests/data/lm_eval/results_2026-01-21T03-44-18.458309.json
 ```
 
-
 Full manual for conversion of your own lm-eval evaluation log into unified is available below:
 
 ```bash
@@ -217,6 +216,75 @@ options:
                         Version of the evaluation library
 ```
 
+## lighteval
+
+The conversion script from `lighteval` evaluation logs to the unified schema can be run using `every_eval_ever/converters/lighteval/__main__.py`.
+
+Using the `--log_path` argument, you can provide either:
+- A lighteval results JSON file (e.g., `results_2026-05-11T07-33-49.106520.json`)
+- A directory containing multiple lighteval results files
+
+The command for converting a lighteval evaluation log:
+
+```bash
+uv run every_eval_ever convert lighteval --log_path tests/data/lighteval/results_2026-05-11T07-33-49.106520.json
+```
+
+To convert all lighteval results in a directory:
+
+```bash
+uv run every_eval_ever convert lighteval --log_path path/to/lighteval/results/directory
+```
+
+The lighteval adapter automatically:
+- Parses model provider and pretrained model name from the `model_name` field
+- Extracts generation parameters from `model_config.generation_parameters`
+- Handles tasks with seed suffixes (e.g., `aime25|0`) by extracting the base task name
+- Infers inference engine from the model provider (can be overridden with `--inference_engine`)
+- Resolves metric configurations including bounds and directionality (`higher_is_better`)
+
+Full manual for conversion of your own lighteval evaluation log into unified is available below:
+
+```bash
+usage: __main__.py [-h] --log_path LOG_PATH [--output_dir OUTPUT_DIR]
+                   [--source_organization_name SOURCE_ORGANIZATION_NAME]
+                   [--evaluator_relationship {first_party,third_party,collaborative,other}]
+                   [--source_organization_url SOURCE_ORGANIZATION_URL]
+                   [--source_organization_logo_url SOURCE_ORGANIZATION_LOGO_URL]
+                   [--inference_engine INFERENCE_ENGINE]
+                   [--inference_engine_version INFERENCE_ENGINE_VERSION]
+                   [--eval_library_name EVAL_LIBRARY_NAME]
+                   [--eval_library_version EVAL_LIBRARY_VERSION]
+
+Convert lighteval output to every_eval_ever format
+
+options:
+  -h, --help            show this help message and exit
+  --log_path LOG_PATH   Path to lighteval results JSON file or directory
+                        containing results files
+  --output_dir OUTPUT_DIR
+                        Output directory for converted files
+  --source_organization_name SOURCE_ORGANIZATION_NAME
+                        Name of the organization that ran the evaluation
+  --evaluator_relationship {first_party,third_party,collaborative,other}
+                        Relationship of the evaluator to the model
+  --source_organization_url SOURCE_ORGANIZATION_URL
+                        URL of the source organization
+  --source_organization_logo_url SOURCE_ORGANIZATION_LOGO_URL
+                        Logo of the source organization
+  --inference_engine INFERENCE_ENGINE
+                        Override inferred inference engine (e.g. 'vllm',
+                        'transformers'). Auto-detected from model provider when
+                        possible.
+  --inference_engine_version INFERENCE_ENGINE_VERSION
+                        Inference engine version (e.g. '0.6.0'). Not available
+                        from lighteval logs, so must be provided manually.
+  --eval_library_name EVAL_LIBRARY_NAME
+                        Name of the evaluation library (default: lighteval)
+  --eval_library_version EVAL_LIBRARY_VERSION
+                        Version of the evaluation library
+```
+
 ## AlpacaEval
 
 The AlpacaEval converter fetches the public leaderboard CSV directly from GitHub

diff --git a/every_eval_ever/converters/common/adapter.py b/every_eval_ever/converters/common/adapter.py
@@ -31,6 +31,7 @@ class SupportedLibrary(Enum):
     LM_EVAL = 'lm-evaluation-harness'
     INSPECT_AI = 'inspect-ai'
     HELM = 'helm'
+    LIGHTEVAL = 'lighteval'
     CUSTOM = 'custom'
 
 

diff --git a/every_eval_ever/converters/lighteval/__init__.py b/every_eval_ever/converters/lighteval/__init__.py
@@ -0,0 +1 @@
+"""lighteval adapter for every_eval_ever."""
diff --git a/every_eval_ever/converters/lighteval/__main__.py b/every_eval_ever/converters/lighteval/__main__.py
@@ -0,0 +1,142 @@
+"""CLI for converting lighteval output to every_eval_ever format."""
+
+import argparse
+import json
+import sys
+import uuid
+from pathlib import Path
+
+from .adapter import LightEvalAdapter
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert lighteval output to every_eval_ever format'
+    )
+    parser.add_argument(
+        '--log_path',
+        type=str,
+        required=True,
+        help='Path to results JSON file or directory containing results files',
+    )
+    parser.add_argument(
+        '--output_dir',
+        type=str,
+        default='data',
+        help='Output directory for converted files',
+    )
+    parser.add_argument(
+        '--source_organization_name',
+        type=str,
+        default='',
+        help='Name of the organization that ran the evaluation',
+    )
+    parser.add_argument(
+        '--evaluator_relationship',
+        type=str,
+        default='first_party',
+        choices=['first_party', 'third_party', 'collaborative', 'other'],
+        help='Relationship of the evaluator to the model',
+    )
+    parser.add_argument(
+        '--source_organization_url',
+        type=str,
+        default=None,
+        help='URL of the source organization',
+    )
+    parser.add_argument(
+        '--source_organization_logo_url',
+        type=str,
+        default=None,
+        help='Logo of the source organization',
+    )
+    parser.add_argument(
+        '--inference_engine',
+        type=str,
+        default=None,
+        help="Override inference engine name (e.g. 'vllm', 'transformers'). "
+        'Auto-detected from provider when possible.',
+    )
+    parser.add_argument(
+        '--inference_engine_version',
+        type=str,
+        default=None,
+        help="Inference engine version (e.g. '0.6.0'). "
+        'Not available from lighteval logs, so must be provided manually.',
+    )
+    parser.add_argument(
+        '--eval_library_name',
+        type=str,
+        default='lighteval',
+        help='Name of the evaluation library (e.g. inspect_ai, lm_eval, helm, lighteval)',
+    )
+    parser.add_argument(
+        '--eval_library_version',
+        type=str,
+        default='unknown',
+        help='Version of the evaluation library. It should be extracted in the adapter if available in the evaluation log.',
+    )
+
+    args = parser.parse_args()
+
+    adapter = LightEvalAdapter()
+    output_dir = Path(args.output_dir)
+
+    metadata_args = {
+        'source_organization_name': args.source_organization_name,
+        'evaluator_relationship': args.evaluator_relationship,
+        'source_organization_url': args.source_organization_url,
+        'eval_library_name': args.eval_library_name,
+        'eval_library_version': args.eval_library_version,
+    }
+    if args.inference_engine:
+        metadata_args['inference_engine'] = args.inference_engine
+    if args.inference_engine_version:
+        metadata_args['inference_engine_version'] = (
+            args.inference_engine_version
+        )
+
+    log_path = Path(args.log_path)
+
+    if log_path.is_file():
+        logs = adapter.transform_from_file(log_path, metadata_args)
+    elif log_path.is_dir():
+        logs = adapter.transform_from_directory(log_path, metadata_args)
+    else:
+        print(f'Error: {log_path} is not a file or directory', file=sys.stderr)
+        sys.exit(1)
+
+    for log in logs:
+        # Organize as: output_dir/{evaluation_name}/{developer}/{model_name}/{uuid}.json
+        # Use the first evaluation result's name (before any /filter suffix) as the task name
+        if log.evaluation_results:
+            eval_name = log.evaluation_results[0].evaluation_name.split('/')[0]
+        else:
+            eval_name = 'unknown'
+
+        model_parts = log.model_info.id.split('/')
+        if len(model_parts) >= 2:
+            developer = model_parts[0]
+            model_name = '/'.join(model_parts[1:])
+        else:
+            developer = 'unknown'
+            model_name = log.model_info.id
+
+        out_path = output_dir / eval_name / developer / model_name
+        out_path.mkdir(parents=True, exist_ok=True)
+
+        eval_uuid = str(uuid.uuid4())
+        out_file = out_path / f'{eval_uuid}.json'
+
+        with open(out_file, 'w') as f:
+            json.dump(
+                log.model_dump(mode='json', exclude_none=True), f, indent=2
+            )
+
+        print(f'  {out_file}')
+
+    print(f'\nConverted {len(logs)} evaluation log(s).')
+
+
+if __name__ == '__main__':
+    main()