From f6e92884d966f805788963c83dc23050e6922841 Mon Sep 17 00:00:00 2001 From: phdgil Date: Fri, 29 May 2026 18:50:36 +0900 Subject: [PATCH] Fix clipped table text after HWP conversion --- hwp2hwpx/section_converter.py | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/hwp2hwpx/section_converter.py b/hwp2hwpx/section_converter.py index 21f06d6..4793fd9 100644 --- a/hwp2hwpx/section_converter.py +++ b/hwp2hwpx/section_converter.py @@ -1,6 +1,8 @@ """Convert HWP BodyText sections to HWPX section XML files.""" import struct +import math +import re from .xml_builder import root_element, sub, make_tag from . import value_maps as vm @@ -53,6 +55,24 @@ def _compute_final_dimensions(sc_content): return int(round(w)), int(round(h)) +def _weighted_text_len(text): + """Approximate rendered text length for table wrapping. + + HWP cell heights in older binary files can be lower than the height needed + by HWPX viewers after line wrapping. Count CJK characters as full width and + ASCII as narrower so we only expand rows that are likely to clip. + """ + total = 0.0 + for ch in text: + if ch.isspace(): + total += 0.25 + elif ord(ch) < 128: + total += 0.55 + else: + total += 1.0 + return total + + def _transform_point(x, y, sc_content): """Transform a point through the scalerotation chain (scaling only, no translation). @@ -1107,6 +1127,63 @@ def _build_table_cells(self, tbl, start_pos, end_pos, ctrl_level): self._build_table_cell(tr, cell_content, cell_start, cell_end) + self._adjust_table_row_heights(tbl) + + def _adjust_table_row_heights(self, tbl): + """Expand table row heights when wrapped cell text would be clipped.""" + for tr in tbl.findall(make_tag("hp", "tr")): + required_heights = [] + cells = [] + for tc in tr.findall(make_tag("hp", "tc")): + csz = tc.find(make_tag("hp", "cellSz")) + if csz is None: + continue + + try: + width = int(csz.get("width", "0")) + height = int(csz.get("height", "0")) + except ValueError: + continue + + text = " ".join( + t.text or "" for t in tc.iter(make_tag("hp", "t")) + if (t.text or "").strip() + ) + text = re.sub(r"\s+", " ", text).strip() + if not text: + continue + + margin = tc.find(make_tag("hp", "cellMargin")) + if margin is not None: + left = int(margin.get("left", "141")) + right = int(margin.get("right", "141")) + top = int(margin.get("top", "141")) + bottom = int(margin.get("bottom", "141")) + else: + left = right = top = bottom = 141 + + # HWP char shape heights are hundredths of a point; 1000 is a + # common 10pt body font. Section conversion does not have direct + # access to header charPr records here, so use the body default. + font_height = 1000 + available_width = max(1, width - left - right) + chars_per_line = max(1.0, available_width / (font_height * 0.92)) + lines = max(1, math.ceil(_weighted_text_len(text) / chars_per_line)) + required = int(math.ceil(lines * font_height * 1.32 + top + bottom + 180)) + required = max(required, int(font_height * 1.55 + top + bottom)) + + if height < required: + required_heights.append(required) + cells.append((csz, height)) + + if not required_heights: + continue + + row_height = max(required_heights) + for csz, height in cells: + if height < row_height: + csz.set("height", str(row_height)) + def _build_table_cell(self, tr, cell_content, children_start, children_end): """Build a table cell element.""" tc = sub(tr, "hp", "tc")