Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions hwp2hwpx/section_converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Convert HWP BodyText sections to HWPX section XML files."""

import struct
import math
import re
from .xml_builder import root_element, sub, make_tag
from . import value_maps as vm

Expand Down Expand Up @@ -53,6 +55,24 @@ def _compute_final_dimensions(sc_content):
return int(round(w)), int(round(h))


def _weighted_text_len(text):
"""Approximate rendered text length for table wrapping.

HWP cell heights in older binary files can be lower than the height needed
by HWPX viewers after line wrapping. Count CJK characters as full width and
ASCII as narrower so we only expand rows that are likely to clip.
"""
total = 0.0
for ch in text:
if ch.isspace():
total += 0.25
elif ord(ch) < 128:
total += 0.55
else:
total += 1.0
return total


def _transform_point(x, y, sc_content):
"""Transform a point through the scalerotation chain (scaling only, no translation).

Expand Down Expand Up @@ -1107,6 +1127,63 @@ def _build_table_cells(self, tbl, start_pos, end_pos, ctrl_level):

self._build_table_cell(tr, cell_content, cell_start, cell_end)

self._adjust_table_row_heights(tbl)

def _adjust_table_row_heights(self, tbl):
"""Expand table row heights when wrapped cell text would be clipped."""
for tr in tbl.findall(make_tag("hp", "tr")):
required_heights = []
cells = []
for tc in tr.findall(make_tag("hp", "tc")):
csz = tc.find(make_tag("hp", "cellSz"))
if csz is None:
continue

try:
width = int(csz.get("width", "0"))
height = int(csz.get("height", "0"))
except ValueError:
continue

text = " ".join(
t.text or "" for t in tc.iter(make_tag("hp", "t"))
if (t.text or "").strip()
)
text = re.sub(r"\s+", " ", text).strip()
if not text:
continue

margin = tc.find(make_tag("hp", "cellMargin"))
if margin is not None:
left = int(margin.get("left", "141"))
right = int(margin.get("right", "141"))
top = int(margin.get("top", "141"))
bottom = int(margin.get("bottom", "141"))
else:
left = right = top = bottom = 141

# HWP char shape heights are hundredths of a point; 1000 is a
# common 10pt body font. Section conversion does not have direct
# access to header charPr records here, so use the body default.
font_height = 1000
available_width = max(1, width - left - right)
chars_per_line = max(1.0, available_width / (font_height * 0.92))
lines = max(1, math.ceil(_weighted_text_len(text) / chars_per_line))
required = int(math.ceil(lines * font_height * 1.32 + top + bottom + 180))
required = max(required, int(font_height * 1.55 + top + bottom))

if height < required:
required_heights.append(required)
cells.append((csz, height))

if not required_heights:
continue

row_height = max(required_heights)
for csz, height in cells:
if height < row_height:
csz.set("height", str(row_height))

def _build_table_cell(self, tr, cell_content, children_start, children_end):
"""Build a table cell element."""
tc = sub(tr, "hp", "tc")
Expand Down