diff --git a/pyproject.toml b/pyproject.toml index a07390d..deb6a50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ where = ["python"] "subunit2junitxml" = "subunit.filter_scripts.subunit2junitxml:main" "subunit2pyunit" = "subunit.filter_scripts.subunit2pyunit:main" "gojson2subunit" = "subunit.filter_scripts.gojson2subunit:main" +"junitxml2subunit" = "subunit.filter_scripts.junitxml2subunit:main" "tap2subunit" = "subunit.filter_scripts.tap2subunit:main" [tool.setuptools.dynamic] diff --git a/python/subunit/__init__.py b/python/subunit/__init__.py index b89c41d..235de86 100644 --- a/python/subunit/__init__.py +++ b/python/subunit/__init__.py @@ -1261,6 +1261,159 @@ def make_test_id(pkg, test): return 1 if any_failed else 0 +def JUnitXML2SubUnit(xml_files, output_stream): + """Convert JUnit XML test reports to a subunit v2 byte stream. + + Reads each path in ``xml_files`` (in the supplied order) and emits + one subunit packet pair per ```` element. The packet pair + is ``inprogress`` followed by the terminal status, with synthetic + timestamps spaced by the testcase's ``time`` attribute so consumers + can recover the recorded duration. + + Test IDs are formed as ``::`` from the testcase's + ``classname`` and ``name`` attributes. Maven Surefire and Gradle + both populate ``classname`` with the fully-qualified Java class + (e.g. ``com.example.FooTest``), so the resulting ID is + ``com.example.FooTest::testBar``. + + Mapping rules: + * ```` child → status ``fail`` (an assertion failure). + * ```` child → status ``fail`` (an unexpected exception); + subunit doesn't distinguish these and the JUnit author intent is + identical for our purposes (both mean "did not pass"). + * ```` child → status ``skip``. + * Otherwise → status ``success``. + + The body text and ``message``/``type`` attributes of the failure + element are folded into a single ``text/plain`` attachment on the + terminal packet, mirroring how ``GoJSON2SubUnit`` attaches captured + stdout to its results. + + Per-class ```` / ```` blocks aren't attributed + to individual testcases by the JUnit schema (they cover the whole + suite). They're dropped — preserving them would require attaching + them to a synthetic suite-level packet, and most consumers don't + surface that. + + :param xml_files: Iterable of file paths containing JUnit XML. + :param output_stream: A binary stream to write subunit v2 bytes to. + :return: 0 if no testcase failed or errored, 1 otherwise. Files that + fail to parse are reported on stderr and counted as a failure so + the broken XML doesn't get silently swallowed. + """ + import datetime + import xml.etree.ElementTree as ET + + output = StreamResultToBytes(output_stream) + UTF8_TEXT = "text/plain; charset=UTF8" + any_failed = False + # Synthetic timestamps. We don't know when the JUnit run actually + # happened, but spacing the inprogress/terminal packets by each + # testcase's recorded `time` attribute lets consumers compute the + # right duration without making up wall-clock data. + clock = datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc) + + def parse_time(value): + if value is None: + return 0.0 + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + def iter_testsuites(root): + # JUnit XML files come in two shapes: a single ```` + # at the root, or a ```` wrapper containing many. + if root.tag == "testsuite": + yield root + elif root.tag == "testsuites": + for ts in root.findall("testsuite"): + yield ts + # Anything else is silently ignored — a non-JUnit document. + + for path in xml_files: + try: + tree = ET.parse(path) + except (OSError, ET.ParseError) as exc: + sys.stderr.write("JUnitXML2SubUnit: failed to parse {}: {}\n".format(path, exc)) + any_failed = True + continue + + root = tree.getroot() + for suite in iter_testsuites(root): + for case in suite.findall("testcase"): + classname = case.get("classname") or "" + name = case.get("name") or "" + if not name: + # Without a name there's no usable test_id; skip + # rather than emit a malformed ID. + continue + test_id = "{}::{}".format(classname, name) if classname else name + duration = parse_time(case.get("time")) + + failure = case.find("failure") + error = case.find("error") + skipped = case.find("skipped") + + if failure is not None or error is not None: + status = "fail" + detail = failure if failure is not None else error + file_bytes = _format_junit_detail(detail) + any_failed = True + elif skipped is not None: + status = "skip" + file_bytes = _format_junit_detail(skipped) + else: + status = "success" + file_bytes = None + + start_ts = clock + end_ts = clock + datetime.timedelta(seconds=duration) + clock = end_ts + + output.status( + test_id=test_id, + test_status="inprogress", + timestamp=start_ts, + ) + output.status( + test_id=test_id, + test_status=status, + eof=True, + file_name="junit detail" if file_bytes else None, + file_bytes=file_bytes, + mime_type=UTF8_TEXT if file_bytes else None, + timestamp=end_ts, + ) + + return 1 if any_failed else 0 + + +def _format_junit_detail(element): + """Serialise a ````/````/```` body to bytes. + + JUnit elements carry the message and exception type as attributes and + the stack trace as element text. Combine both into a single + text/plain blob so the consumer sees everything on one packet. Returns + ``None`` when there's nothing to attach (an empty ````). + """ + parts = [] + msg = element.get("message") + typ = element.get("type") + if typ and msg: + parts.append("{}: {}".format(typ, msg)) + elif typ: + parts.append(typ) + elif msg: + parts.append(msg) + body = (element.text or "").strip() + if body: + parts.append(body) + if not parts: + return None + return ("\n".join(parts) + "\n").encode("utf-8") + + def tag_stream(original, filtered, tags): """Alter tags on a stream. diff --git a/python/subunit/filter_scripts/junitxml2subunit.py b/python/subunit/filter_scripts/junitxml2subunit.py new file mode 100644 index 0000000..749d683 --- /dev/null +++ b/python/subunit/filter_scripts/junitxml2subunit.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# subunit: extensions to python unittest to get test results from subprocesses. +# Copyright (C) 2026 Jelmer Vernooij +# +# Licensed under either the Apache License, Version 2.0 or the BSD 3-clause +# license at the users choice. A copy of both licenses are available in the +# project source as Apache-2.0 and BSD. You may not use this file except in +# compliance with one of these two licences. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under these licenses is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# license you chose for the specific language governing permissions and +# limitations under that license. +# + +"""A filter that reads JUnit XML test reports and emits a subunit v2 stream. + +JUnit XML is the de-facto interchange format for JVM test runners (Maven +Surefire, Gradle, Ant) and many other ecosystems. Maven and Gradle write +one XML file per test class into a reports directory, so this script +accepts directories as well as individual files. + +Typical use with Maven:: + + mvn clean test ; junitxml2subunit -d target/surefire-reports + +Typical use with Gradle:: + + gradle clean test ; junitxml2subunit -d build/test-results/test +""" + +import argparse +import os +import sys + +from subunit import JUnitXML2SubUnit + + +def parse_args(argv): + parser = argparse.ArgumentParser( + description=( + "Convert JUnit XML test reports to a subunit v2 stream on stdout. " + "Pass individual files as positional arguments or use -d/--dir to " + "walk a reports directory for *.xml files." + ), + ) + parser.add_argument( + "-d", + "--dir", + dest="dirs", + action="append", + default=[], + metavar="DIR", + help=( + "Directory to walk for *.xml report files. May be repeated. " + "Files inside the directory are converted in lexical order so " + "the output is deterministic across runs." + ), + ) + parser.add_argument( + "files", + nargs="*", + help="Individual JUnit XML report files to convert.", + ) + return parser.parse_args(argv) + + +def collect_files(dirs, files): + """Combine `--dir DIR` walks with explicit FILE arguments. + + Within each directory we sort by filename so the resulting subunit + stream is reproducible. Across directories we preserve the user's + argv order (some workflows feed multiple module-specific report + directories and care about the suite ordering). + """ + out = [] + for d in dirs: + if not os.path.isdir(d): + sys.stderr.write("junitxml2subunit: not a directory: {}\n".format(d)) + continue + for root, _dirs, names in sorted(os.walk(d)): + for name in sorted(names): + if name.endswith(".xml"): + out.append(os.path.join(root, name)) + out.extend(files) + return out + + +def main(argv=None): + args = parse_args(argv if argv is not None else sys.argv[1:]) + inputs = collect_files(args.dirs, args.files) + if not inputs: + sys.stderr.write("junitxml2subunit: no input files found (pass FILE arguments or use -d DIR)\n") + return 2 + return JUnitXML2SubUnit(inputs, sys.stdout.buffer) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/tests/test_junitxml2subunit.py b/python/tests/test_junitxml2subunit.py new file mode 100644 index 0000000..3ced5c5 --- /dev/null +++ b/python/tests/test_junitxml2subunit.py @@ -0,0 +1,312 @@ +# +# subunit: extensions to python unittest to get test results from subprocesses. +# Copyright (C) 2026 Jelmer Vernooij +# +# Licensed under either the Apache License, Version 2.0 or the BSD 3-clause +# license at the users choice. A copy of both licenses are available in the +# project source as Apache-2.0 and BSD. You may not use this file except in +# compliance with one of these two licences. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under these licenses is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# license you chose for the specific language governing permissions and +# limitations under that license. +# + +"""Tests for JUnitXML2SubUnit.""" + +import io +import os +import tempfile +from unittest import mock + +from testtools import TestCase +from testtools.testresult.doubles import StreamResult + +import subunit +from subunit.filter_scripts import junitxml2subunit + + +def _write(tmpdir, name, content): + path = os.path.join(tmpdir, name) + with open(path, "w", encoding="utf-8") as fh: + fh.write(content) + return path + + +class TestJUnitXML2SubUnit(TestCase): + """Behavioural tests for `JUnitXML2SubUnit`. + + Each test writes a synthetic JUnit XML doc to disk, runs the converter, + decodes the resulting subunit bytes back into events via + `StreamResult`, and asserts on the (test_id, test_status) tuples. + """ + + def setUp(self): + super().setUp() + self.tmp = tempfile.mkdtemp(prefix="junitxml2subunit-test-") + self.addCleanup(self._rmtree, self.tmp) + self.subunit = io.BytesIO() + + def _rmtree(self, path): + import shutil + + shutil.rmtree(path, ignore_errors=True) + + def _events(self): + self.subunit.seek(0) + sink = StreamResult() + subunit.ByteStreamToStreamResult(self.subunit).run(sink) + return sink._events + + def _statuses(self, events): + return [(e[1], e[2]) for e in events if e[0] == "status"] + + def test_passing_testcase(self): + path = _write( + self.tmp, + "TEST-FooTest.xml", + """ + + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(0, rc) + self.assertEqual( + [ + ("com.example.FooTest::testBar", "inprogress"), + ("com.example.FooTest::testBar", "success"), + ], + self._statuses(self._events()), + ) + + def test_failure_marks_fail_and_returns_nonzero(self): + path = _write( + self.tmp, + "TEST-FooTest.xml", + """ + + + at line 42 + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(1, rc) + events = self._events() + terminal = [e for e in events if e[0] == "status" and e[1] == "com.example.FooTest::testBar" and e[2] == "fail"] + self.assertEqual(1, len(terminal)) + # The failure detail (type + message + body) is folded into a + # single attachment so consumers see everything in one place. + ev = terminal[0] + self.assertEqual("junit detail", ev[5]) + # The decoder returns the attachment as a memoryview; coerce to + # bytes for substring searching. + body = bytes(ev[6]) + self.assertIn(b"java.lang.AssertionError", body) + self.assertIn(b"expected x but was y", body) + self.assertIn(b"at line 42", body) + + def test_error_is_treated_as_fail(self): + # An in JUnit terms is "an unexpected exception". subunit + # has no separate "error" status that maps cleanly, and from a + # consumer's perspective both mean "did not pass" — so it lands + # on `fail` like a failure does. + path = _write( + self.tmp, + "TEST-FooTest.xml", + """ + + + stack trace + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(1, rc) + statuses = self._statuses(self._events()) + self.assertIn(("com.example.FooTest::testBar", "fail"), statuses) + + def test_skipped(self): + path = _write( + self.tmp, + "TEST-FooTest.xml", + """ + + + + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(0, rc) + statuses = self._statuses(self._events()) + self.assertIn(("com.example.FooTest::testBar", "skip"), statuses) + + def test_testsuites_wrapper_is_unwrapped(self): + # Some emitters (Gradle, Ant) wrap multiple in a + # root; both shapes need to work. + path = _write( + self.tmp, + "report.xml", + """ + + + + + + + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(0, rc) + statuses = self._statuses(self._events()) + self.assertIn(("A::testOne", "success"), statuses) + self.assertIn(("B::testTwo", "success"), statuses) + + def test_multiple_files_concatenate(self): + a = _write( + self.tmp, + "TEST-A.xml", + """ + + + +""", + ) + b = _write( + self.tmp, + "TEST-B.xml", + """ + + + +""", + ) + rc = subunit.JUnitXML2SubUnit([a, b], self.subunit) + self.assertEqual(0, rc) + statuses = self._statuses(self._events()) + self.assertIn(("A::testOne", "success"), statuses) + self.assertIn(("B::testTwo", "success"), statuses) + + def test_missing_classname_falls_back_to_name(self): + # `classname` is technically optional; without it the test ID + # is just the bare method name rather than emitting "::name". + path = _write( + self.tmp, + "report.xml", + """ + + + +""", + ) + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(0, rc) + statuses = self._statuses(self._events()) + self.assertIn(("bareName", "success"), statuses) + + def test_unparseable_xml_counts_as_failure(self): + # A broken file is loud (stderr warning + non-zero exit) rather + # than silently dropping the suite — broken XML in a CI report + # almost always means a test runner crash. + path = _write(self.tmp, "broken.xml", "<>") + with mock.patch("sys.stderr", new=io.StringIO()) as stderr: + rc = subunit.JUnitXML2SubUnit([path], self.subunit) + self.assertEqual(1, rc) + self.assertIn("failed to parse", stderr.getvalue()) + + def test_time_attribute_advances_synthetic_clock(self): + # Each testcase's `time` attribute should determine the gap + # between its inprogress and terminal packets, so the consumer + # can recover the duration. Use distinct values per test to + # confirm both make it through. + path = _write( + self.tmp, + "report.xml", + """ + + + + +""", + ) + subunit.JUnitXML2SubUnit([path], self.subunit) + events = self._events() + + # Pull the per-test (inprogress, terminal) timestamp pairs. The + # ByteStreamToStreamResult emits a "time" event before each + # status event when the packet carries a timestamp. + # Easier: walk the events and pair them up by test_id. + timestamps = {} + for e in events: + if e[0] != "status": + continue + test_id = e[1] + ts = e[-1] + if ts is None: + continue + timestamps.setdefault(test_id, []).append(ts) + + one = timestamps["A::testOne"] + two = timestamps["A::testTwo"] + self.assertEqual(2, len(one)) + self.assertEqual(2, len(two)) + # testOne spans 0.5s + self.assertAlmostEqual(0.5, (one[1] - one[0]).total_seconds(), places=3) + # testTwo spans 1.5s + self.assertAlmostEqual(1.5, (two[1] - two[0]).total_seconds(), places=3) + + +class TestCollectFiles(TestCase): + """Tests for the `--dir` walking logic in the script entrypoint.""" + + def setUp(self): + super().setUp() + self.tmp = tempfile.mkdtemp(prefix="junitxml2subunit-collect-") + self.addCleanup(self._rmtree, self.tmp) + + def _rmtree(self, path): + import shutil + + shutil.rmtree(path, ignore_errors=True) + + def test_dir_walks_xml_files_only(self): + a = _write(self.tmp, "TEST-A.xml", "") + _write(self.tmp, "ignored.txt", "") + b = _write(self.tmp, "TEST-B.xml", "") + out = junitxml2subunit.collect_files([self.tmp], []) + self.assertEqual({a, b}, set(out)) + + def test_dir_results_are_sorted_for_deterministic_output(self): + # Lexical sort within a directory so the subunit stream is + # reproducible across runs (and across filesystems with + # different readdir order). + b = _write(self.tmp, "TEST-B.xml", "") + a = _write(self.tmp, "TEST-A.xml", "") + out = junitxml2subunit.collect_files([self.tmp], []) + self.assertEqual([a, b], out) + + def test_explicit_files_appended_after_dir_walks(self): + a = _write(self.tmp, "TEST-A.xml", "") + # Build an explicit file outside the walked dir to confirm it's + # appended after the discovered files rather than re-walked. + extra_dir = tempfile.mkdtemp(prefix="junitxml2subunit-extra-") + self.addCleanup(self._rmtree, extra_dir) + explicit = _write(extra_dir, "extra.xml", "") + out = junitxml2subunit.collect_files([self.tmp], [explicit]) + self.assertEqual([a, explicit], out) + + def test_missing_dir_warned_and_skipped(self): + with mock.patch("sys.stderr", new=io.StringIO()) as stderr: + out = junitxml2subunit.collect_files(["/nonexistent/junit/dir"], []) + self.assertEqual([], out) + self.assertIn("not a directory", stderr.getvalue())