From 40fbfdacc14b7e955172f9d68ae81594a9d240f4 Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sat, 4 Jul 2026 00:31:34 +0800 Subject: [PATCH 01/15] Fix generic Pine codegen parity gaps Bugs solved: - Preserve tuple-returning TA results through request.security and mutable security history rebinds. - Type untyped string params/locals/timeframe expressions correctly for string concatenation. - Materialize inline expression args passed to UDF series params. - Type array.new(..., na), drawing-array constructors, str/syminfo/text constants, timestamp timezone expressions, and stdev/variance biased args. Validation: - pytest tests/test_codegen_validation_fixes.py tests/test_support_checker.py -q - data/draft compile-error count: 5 -> 0 - data/standard+anomaly grade unchanged --- pineforge_codegen/analyzer/base.py | 44 +++++++++-- pineforge_codegen/analyzer/call_handlers.py | 51 +++++++++++-- pineforge_codegen/analyzer/tables.py | 22 +++++- pineforge_codegen/codegen/base.py | 55 +++++++++---- pineforge_codegen/codegen/emit_top.py | 3 + pineforge_codegen/codegen/security.py | 40 ++++++++++ pineforge_codegen/codegen/types.py | 12 ++- pineforge_codegen/codegen/visit_call.py | 51 +++++++++---- pineforge_codegen/codegen/visit_expr.py | 12 +++ pineforge_codegen/codegen/visit_stmt.py | 5 ++ pineforge_codegen/signatures.py | 4 +- pineforge_codegen/support_checker.py | 33 ++++++++ tests/test_codegen_validation_fixes.py | 85 ++++++++++++++++++++- tests/test_support_checker.py | 19 +++++ 14 files changed, 391 insertions(+), 45 deletions(-) diff --git a/pineforge_codegen/analyzer/base.py b/pineforge_codegen/analyzer/base.py index b7e4d78..0befe92 100644 --- a/pineforge_codegen/analyzer/base.py +++ b/pineforge_codegen/analyzer/base.py @@ -156,6 +156,7 @@ def __init__(self, ast: Program, filename: str = "") -> None: # Probe: data/validation/udt-method-probe-20-udt-return-from-func. self._func_udt_return_types: dict[str, str] = {} self._func_return_type_specs: dict[str, "TypeSpec"] = {} + self._func_param_type_specs: dict[str, list] = {} # Per-function var_members and series_vars (for call-site cloning) self._func_var_members: dict[str, list] = {} # func_name -> [(name, PineType, init_str)] self._func_series_vars: dict[str, set] = {} # func_name -> set[str] @@ -1184,6 +1185,16 @@ def _visit_FuncDef(self, node: FuncDef) -> PineType: if hi > lo: self._func_ta_ranges[node.name] = (lo, hi) + inferred_param_specs = self._param_type_specs_from_def(node) + for i, param in enumerate(node.params): + if i < len(inferred_param_specs) and inferred_param_specs[i] is not None: + continue + sym = self._symbols.resolve(param) + spec = getattr(sym, "type_spec", None) if sym is not None else None + if spec is not None and i < len(inferred_param_specs): + inferred_param_specs[i] = spec + self._func_param_type_specs[node.name] = inferred_param_specs + self._symbols.exit_scope() # Detect if function returns a tuple (last stmt is TupleLiteral) @@ -1511,6 +1522,20 @@ def _visit_BinOp(self, node: BinOp) -> PineType: # String concatenation: if either side is STRING, result is STRING if left_type == PineType.STRING or right_type == PineType.STRING: + def _mark_string_param(expr) -> None: + if not isinstance(expr, Identifier): + return + sym = self._symbols.resolve(expr.name) + if sym is None or not (sym.scope or "").startswith("func_"): + return + if sym.pine_type == PineType.UNKNOWN: + sym.pine_type = PineType.STRING + sym.type_spec = TypeSpec.primitive("string") + + if left_type == PineType.STRING: + _mark_string_param(node.right) + if right_type == PineType.STRING: + _mark_string_param(node.left) return PineType.STRING # Arithmetic: promote to FLOAT if either side is FLOAT @@ -1583,11 +1608,14 @@ def _visit_FuncCall(self, node: FuncCall) -> PineType: if isinstance(obj, Identifier) and obj.name == "str": for arg in node.args: self._visit(arg) - # Most str.* return a string, but a few don't: - # str.tonumber -> float, str.length -> int + # Most str.* return a string, but predicates and index helpers + # are scalar. Keep this aligned with signatures.py and the C++ + # emitter's _infer_type path. + if member in ("contains", "startswith", "endswith"): + return PineType.BOOL if member == "tonumber": return PineType.FLOAT - if member == "length": + if member in ("length", "pos"): return PineType.INT return PineType.STRING @@ -1812,9 +1840,11 @@ def _visit_MemberAccess(self, node: MemberAccess) -> PineType: # syminfo.* if ns == "syminfo": - if node.member == "mintick": - return PineType.FLOAT - return PineType.STRING + from .. import signatures as _pf_sigs + return _pf_sigs.SYMINFO_VARIABLES.get( + f"syminfo.{node.member}", + PineType.STRING, + ) # color.* constants if ns == "color": @@ -1873,7 +1903,7 @@ def _visit_MemberAccess(self, node: MemberAccess) -> PineType: # text.* constants (align_left, align_right, etc.) if ns == "text": - return PineType.INT + return PineType.STRING # extend.* constants (left, right, both, none) if ns == "extend": diff --git a/pineforge_codegen/analyzer/call_handlers.py b/pineforge_codegen/analyzer/call_handlers.py index 552bcbc..11ef1c0 100644 --- a/pineforge_codegen/analyzer/call_handlers.py +++ b/pineforge_codegen/analyzer/call_handlers.py @@ -75,7 +75,7 @@ from .contracts import FixnanCallSite, FuncInfo, SecurityCallInfo, TACallSite from .tables import ( BAR_FIELDS, TA_CLASS_MAP, TA_MULTI_CTOR, TA_NO_CTOR, TA_PERIOD_ARG, - TA_TUPLE_RETURNS, + TA_TUPLE_RETURNS, TA_TUPLE_ELEMENT_COUNTS, TA_COMPUTE_ARGS, ) @@ -235,9 +235,14 @@ def _handle_ta_call(self, func_name: str, node: FuncCall) -> PineType: elif func_name in TA_PERIOD_ARG: ctor_indices = {TA_PERIOD_ARG[func_name]} - for i, arg in enumerate(all_args): - if i not in ctor_indices and arg is not None: - compute_args.append(arg) + if func_name in TA_COMPUTE_ARGS: + for i in TA_COMPUTE_ARGS[func_name]: + if i < len(all_args) and all_args[i] is not None: + compute_args.append(all_args[i]) + else: + for i, arg in enumerate(all_args): + if i not in ctor_indices and arg is not None: + compute_args.append(arg) is_static = self._global_scope and all(self._is_static_expression(arg) for arg in compute_args) site = TACallSite( @@ -309,6 +314,27 @@ def _handle_request_call(self, func_name: str, node: FuncCall) -> PineType: returns_tuple = isinstance(expr_node, TupleLiteral) tuple_size = len(expr_node.elements) if returns_tuple else 0 + if not returns_tuple and isinstance(expr_node, FuncCall): + expr_func = None + expr_ns = None + if (isinstance(expr_node.callee, MemberAccess) + and isinstance(expr_node.callee.object, Identifier)): + expr_ns = expr_node.callee.object.name + expr_func = expr_node.callee.member + if expr_ns == "ta": + if expr_func == "vwap": + merged_v = list(expr_node.args) + for i, pname in enumerate(["source", "anchor", "stdev_mult"]): + if pname in expr_node.kwargs: + while len(merged_v) <= i: + merged_v.append(None) + if merged_v[i] is None: + merged_v[i] = expr_node.kwargs[pname] + if len(merged_v) >= 3: + expr_func = "vwap_bands" + if expr_func in TA_TUPLE_RETURNS: + returns_tuple = True + tuple_size = TA_TUPLE_ELEMENT_COUNTS.get(expr_func, 0) gaps_node = all_args[3] if len(all_args) > 3 else None lookahead_node = all_args[4] if len(all_args) > 4 else None @@ -899,6 +925,18 @@ def _handle_user_func_call(self, func_name: str, node: FuncCall) -> PineType: arg = node.args[p_idx] if isinstance(arg, Identifier) and arg.name in BAR_FIELDS: self._series_bar_fields.add(arg.name) + elif isinstance(arg, Identifier): + sym = self._symbols.resolve(arg.name) + spec = getattr(sym, "type_spec", None) if sym is not None else None + if spec is not None and spec.kind in ("array", "map", "matrix"): + continue + if sym is not None: + sym.is_series = True + if sym.scope and sym.scope.startswith("func_"): + caller_name = sym.scope[5:] + self._func_series_vars.setdefault(caller_name, set()).add(arg.name) + else: + self._series_vars.add(arg.name) # Per-call-site cloning: if this function has TA calls or series vars, # track call sites so codegen can create per-call-site variants. @@ -1041,7 +1079,10 @@ def _rep(m: re.Match) -> str: # Per-param TypeSpec: declared hints are authoritative; for untyped # params, infer from the call-site argument's type_spec (so an untyped # ``s`` used as a string, or a UDT passed by value, emits correctly). - param_specs = self._param_type_specs_from_def(func_def) + param_specs = list( + getattr(self, "_func_param_type_specs", {}).get(func_name) + or self._param_type_specs_from_def(func_def) + ) arg_specs = [self._type_spec_from_expr(arg) for arg in node.args] for i in range(len(param_specs)): if param_specs[i] is None and i < len(arg_specs): diff --git a/pineforge_codegen/analyzer/tables.py b/pineforge_codegen/analyzer/tables.py index 558f836..2d4faf1 100644 --- a/pineforge_codegen/analyzer/tables.py +++ b/pineforge_codegen/analyzer/tables.py @@ -110,7 +110,7 @@ TA_PERIOD_ARG = { "sma": 1, "ema": 1, "rma": 1, "rsi": 1, "atr": 0, "highest": 1, "lowest": 1, "change": 1, - "wma": 1, "hma": 1, "stdev": 1, + "wma": 1, "hma": 1, # Task 6 "sum": 1, # Task 7 Batch 1 @@ -119,7 +119,7 @@ "mom": 1, "roc": 1, "rising": 1, "falling": 1, "cci": 1, # cum has no period arg — handled in TA_NO_CTOR # Task 7 Batch 3 - "variance": 1, "median": 1, "highestbars": 1, "lowestbars": 1, + "median": 1, "highestbars": 1, "lowestbars": 1, # Batch 4 "cmo": 1, "cog": 1, "correlation": 2, "percentile_nearest_rank": 1, "percentile_linear_interpolation": 1, @@ -130,6 +130,14 @@ # Functions that return tuples TA_TUPLE_RETURNS = {"macd", "supertrend", "dmi", "bb", "kc", "vwap_bands"} +TA_TUPLE_ELEMENT_COUNTS = { + "macd": 3, + "supertrend": 2, + "dmi": 3, + "bb": 3, + "kc": 3, + "vwap_bands": 3, +} # Functions with multiple constructor args TA_MULTI_CTOR = { @@ -156,6 +164,16 @@ "bbw": [1, 2], # length, mult "kcw": [1, 2], # length, mult "tr": [0], # handle_na (compile-time bool) + "stdev": [1, 2], # length, biased + "variance": [1, 2], # length, biased +} + +# Compute-arg indices: which positional args are forwarded to ``.compute()``. +# Entries here override the default analyzer behavior of forwarding every +# non-constructor argument. +TA_COMPUTE_ARGS = { + "stdev": [0], + "variance": [0], } # No-state functions (no constructor args, stateless or self-contained) diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py index 6558728..de41600 100644 --- a/pineforge_codegen/codegen/base.py +++ b/pineforge_codegen/codegen/base.py @@ -53,6 +53,7 @@ SKIP_VAR_TYPES, SYMINFO_MEMBER_MAP, COLOR_CONST_MAP, + ARRAY_NEW_CTORS, ARRAY_METHODS, MAP_METHODS, MATRIX_METHODS, @@ -65,6 +66,15 @@ _merge_kwargs, ) +TA_TUPLE_RESULT_TYPES = { + "macd": "ta::MACDResult", + "supertrend": "ta::SupertrendResult", + "dmi": "ta::DMIResult", + "bb": "ta::BBResult", + "kc": "ta::KCResult", + "vwap_bands": "ta::VWAPBandsResult", +} + # (TA_IMPLICIT_COMPUTE / TA_COMPUTE_ARGS now imported from .tables above.) # (TA_IMPLICIT_COMPUTE_FULL / TA_IMPLICIT_APPEND / PINE_TYPE_TO_CPP / @@ -428,6 +438,7 @@ def __init__(self, ctx: AnalyzerContext) -> None: self._current_func_series_params: set[str] = set() # Locals declared in the function currently being emitted (symbol table loses them after analysis) self._current_func_locals: set[str] = set() + self._current_func_local_types: dict[str, str] = {} # for-in loop iterator names (must resolve member access, not enum fallback) self._current_loop_vars: set[str] = set() # Track array variables for codegen @@ -854,31 +865,33 @@ def _register_global_aggregate_member_types(self) -> None: spec = self._matrix_specs.get(recv_name) or TypeSpec.matrix(TypeSpec.primitive("float")) self._matrix_specs[name] = spec self._collection_types[name] = spec - elif ns == "array" and fn in ( - "new", - "new_float", - "new_int", - "new_bool", - "new_string", - "from", - ): + elif ns == "array" and fn in ({"new", "from"} | set(ARRAY_NEW_CTORS)): self._array_vars.add(name) + spec = self._type_spec_from_expr(expr) or self._array_spec_for_name(name) + self._collection_types[name] = spec elif ns == "map" and fn == "new": self._map_vars.add(name) - # Also register var/varip matrix members from AST nodes so that - # the typed-matrix gate checks see the correct element spec. + # Also register var/varip aggregate members from AST nodes so that + # class-member declarations see the precise collection type before + # on_bar emits the initializer. This is required for unannotated + # drawing arrays such as ``var boxes = array.new_box()``. var_decl_map: dict[str, FuncCall] = {} for stmt in (self.ctx.ast.body if hasattr(self.ctx, "ast") else []): if isinstance(stmt, VarDecl) and isinstance(stmt.value, FuncCall): var_decl_map[stmt.name] = stmt.value for name, _ptype, _init_str in self.ctx.var_members: - if name in self._matrix_specs: - continue expr = var_decl_map.get(name) if expr is None: continue fn2, ns2 = self._resolve_callee(expr.callee) + if ns2 == "array" and fn2 in ({"new", "from"} | set(ARRAY_NEW_CTORS)): + self._array_vars.add(name) + spec2 = self._type_spec_from_expr(expr) or self._array_spec_for_name(name) + self._collection_types[name] = spec2 + continue + if name in self._matrix_specs: + continue if ns2 == "matrix" and fn2 == "new": targs2 = self._template_args_from_call(expr) if hasattr(expr, "annotations") else [] elem_spec2 = self._type_spec_from_hint_name(targs2[0]) if targs2 else TypeSpec.primitive("float") @@ -1331,6 +1344,11 @@ def generate(self) -> str: hist_fields: set[str] = set() for el in expr_node.elements: hist_fields |= self._collect_security_ohlc_hist_fields(el) + for name in item.get("mutable_globals", []) or []: + info = self._global_mutable_infos.get(name) + if info is not None: + for stmt in getattr(info, "source_stmts", []) or []: + hist_fields |= self._collect_security_ohlc_hist_fields(stmt) self._security_ohlc_hist_fields_by_sec[sec_id] = hist_fields for i, el in enumerate(expr_node.elements): ctype = self._infer_cpp_type_for_security_elem(el) @@ -1338,9 +1356,18 @@ def generate(self) -> str: lines.append(f" {ctype} _req_sec_{sec_id}_{i}{{}};") else: lines.append(f" {ctype} _req_sec_{sec_id}_{i} = na();") + elif returns_tuple and tuple_size and tuple_size > 0: + self._security_ohlc_hist_fields_by_sec[sec_id] = ( + self._collect_security_ohlc_hist_fields_for_call(item) + ) + site = self._get_ta_site(expr_node) + ta_name = self._ta_name_from_site(site) if site is not None else "" + ctype = TA_TUPLE_RESULT_TYPES.get(ta_name, "std::tuple") + default = self._security_tuple_result_default(ctype, tuple_size) + lines.append(f" {ctype} _req_sec_{sec_id} = {default};") else: - self._security_ohlc_hist_fields_by_sec[sec_id] = self._collect_security_ohlc_hist_fields( - expr_node + self._security_ohlc_hist_fields_by_sec[sec_id] = ( + self._collect_security_ohlc_hist_fields_for_call(item) ) lines.append(f" double _req_sec_{sec_id} = na();") for field in sorted(self._security_ohlc_hist_fields_by_sec.get(sec_id, ())): diff --git a/pineforge_codegen/codegen/emit_top.py b/pineforge_codegen/codegen/emit_top.py index f3a3e99..2c99aa9 100644 --- a/pineforge_codegen/codegen/emit_top.py +++ b/pineforge_codegen/codegen/emit_top.py @@ -873,6 +873,7 @@ def _emit_func_def(self, fi: FuncInfo, lines: list[str], call_site_idx: int | No self._current_instance_name = None prev_func_locals = self._current_func_locals + prev_func_local_types = self._current_func_local_types prev_func_body = getattr(self, "_current_func_body", None) prev_func_name = getattr(self, "_active_func_name", None) # The function body is the lexical scope used by the UDT-alias analysis @@ -886,6 +887,7 @@ def _emit_func_def(self, fi: FuncInfo, lines: list[str], call_site_idx: int | No prev_ptr_alias = self._udt_ptr_alias_locals self._udt_ptr_alias_locals = set() self._current_func_locals = {n for n, _, _ in self.ctx.func_var_members.get(fi.name, [])} + self._current_func_local_types = {} # Plain (non-persistent) scalar locals are emitted inline and live in # no other set; collect them so the unknown-identifier guard in # _visit_ident does not mistake them for undeclared symbols. @@ -959,6 +961,7 @@ def _emit_func_def(self, fi: FuncInfo, lines: list[str], call_site_idx: int | No self._current_func_series_params = set() self._udt_param_udt = {} self._current_func_locals = prev_func_locals + self._current_func_local_types = prev_func_local_types self._current_func_body = prev_func_body self._active_func_name = prev_func_name self._udt_ptr_alias_locals = prev_ptr_alias diff --git a/pineforge_codegen/codegen/security.py b/pineforge_codegen/codegen/security.py index defbe1e..0b39733 100644 --- a/pineforge_codegen/codegen/security.py +++ b/pineforge_codegen/codegen/security.py @@ -324,9 +324,26 @@ def walk(n): walk(node) return out + def _collect_security_ohlc_hist_fields_for_call(self, item: dict) -> set[str]: + """Collect HTF OHLC history needed by a security expression and any + mutable-global rebinds replayed inside that security evaluator.""" + fields = self._collect_security_ohlc_hist_fields(item.get("expr_node")) + for name in item.get("mutable_globals", []) or []: + info = self._global_mutable_infos.get(name) + if info is None: + continue + for stmt in getattr(info, "source_stmts", []) or []: + fields |= self._collect_security_ohlc_hist_fields(stmt) + return fields + def _security_ohlc_hist_series_cpp(self, sec_id: int, field: str) -> str: return f"_sec{sec_id}_hist_{field}" + @staticmethod + def _security_tuple_result_default(cpp_type: str, tuple_size: int) -> str: + vals = ", ".join("na()" for _ in range(max(0, tuple_size))) + return f"{cpp_type}{{{vals}}}" + def _collect_security_ta_hist_indices(self, node) -> set[int]: """Which security TA call-site indices need HTF history (subscript index >= 1). @@ -1560,6 +1577,29 @@ def emit_security_ta(indices: list[int]) -> None: for name in self._security_ta_hist_series_names(sec_id): lines.append(f" {name}.clear();") lines.append(" break;") + elif returns_tuple and tuple_size and tuple_size > 0: + site = self._get_ta_site(expr_node) + ta_name = self._ta_name_from_site(site) if site is not None else "" + ctype = { + "macd": "ta::MACDResult", + "supertrend": "ta::SupertrendResult", + "dmi": "ta::DMIResult", + "bb": "ta::BBResult", + "kc": "ta::KCResult", + "vwap_bands": "ta::VWAPBandsResult", + }.get(ta_name, "std::tuple") + lines.append(f" case {sec_id}:") + lines.append( + f" _req_sec_{sec_id} = " + f"{self._security_tuple_result_default(ctype, tuple_size)};" + ) + for field in sorted(self._security_ohlc_hist_fields_by_sec.get(sec_id, ())): + lines.append( + f" {self._security_ohlc_hist_series_cpp(sec_id, field)}.clear();" + ) + for name in self._security_ta_hist_series_names(sec_id): + lines.append(f" {name}.clear();") + lines.append(" break;") else: hist = self._security_ohlc_hist_fields_by_sec.get(sec_id, ()) ta_hist_names = self._security_ta_hist_series_names(sec_id) diff --git a/pineforge_codegen/codegen/types.py b/pineforge_codegen/codegen/types.py index adf79f0..abb166b 100644 --- a/pineforge_codegen/codegen/types.py +++ b/pineforge_codegen/codegen/types.py @@ -791,6 +791,8 @@ def _infer_type(self, node) -> str: return "double" if node.name in self._current_func_param_types: return self._current_func_param_types[node.name] + if node.name in getattr(self, "_current_func_local_types", {}): + return self._current_func_local_types[node.name] sym = self.ctx.symbols.resolve(node.name) if sym is not None and getattr(sym, "type_spec", None) is not None: return self._type_spec_to_cpp(sym.type_spec) @@ -829,9 +831,11 @@ def _infer_type(self, node) -> str: if namespace == "str": if func_name == "split": return "std::vector" + if func_name in ("contains", "startswith", "endswith"): + return "bool" if func_name == "tonumber": return "double" - if func_name == "length": + if func_name in ("length", "pos"): return "int" return "std::string" if namespace == "ta" and func_name == "pivot_point_levels": @@ -881,6 +885,12 @@ def _infer_type(self, node) -> str: # pine_str_tostring); bare reads must declare std::string. if ename == "format": return "std::string" + if ename == "timeframe": + if node.member in ("period", "main_period"): + return "std::string" + if node.member == "multiplier": + return "int" + return "bool" # syminfo.* type inference: look up in SYMINFO_MEMBER_MAP # and derive C++ type from the expression (na() or function call). if ename == "syminfo": diff --git a/pineforge_codegen/codegen/visit_call.py b/pineforge_codegen/codegen/visit_call.py index c84cf35..1ce77e1 100644 --- a/pineforge_codegen/codegen/visit_call.py +++ b/pineforge_codegen/codegen/visit_call.py @@ -136,6 +136,7 @@ FuncCall, Identifier, MemberAccess, + NaLiteral, TupleLiteral, StringLiteral, ) @@ -459,10 +460,18 @@ def _visit_func_call(self, node: FuncCall) -> str: if func_name in ("new", "new_float", "new_int", "new_bool", "new_string") or func_name in ARRAY_DRAWING_NEW_CTORS: spec = self._type_spec_from_expr(node) or TypeSpec.array(TypeSpec.primitive("float")) cpp_type = self._type_spec_to_cpp(spec) - init_default = self._default_for_spec(spec.element if spec.element is not None else TypeSpec.primitive("float")) + elem_spec = spec.element if spec.element is not None else TypeSpec.primitive("float") + init_default = self._default_for_spec(elem_spec) if node.args: size_arg = self._visit_expr(node.args[0]) - init_val = self._visit_expr(node.args[1]) if len(node.args) > 1 else init_default + if len(node.args) > 1: + init_val = ( + init_default + if isinstance(node.args[1], NaLiteral) + else self._visit_expr(node.args[1]) + ) + else: + init_val = init_default return f"{cpp_type}((size_t)({size_arg}), {init_val})" return f"{cpp_type}()" if func_name == "from": @@ -699,6 +708,8 @@ def _visit_func_call(self, node: FuncCall) -> str: is_tz_first = True elif isinstance(node.args[0], StringLiteral): is_tz_first = True + elif self._infer_type(node.args[0]) == "std::string": + is_tz_first = True if is_tz_first: # A single string argument is the timestamp(dateString) @@ -1060,17 +1071,31 @@ def _visit_func_call(self, node: FuncCall) -> str: def _visit_arg_for_series(arg_node, arg_idx): """Visit a function argument, returning Series ref for series params.""" - if arg_idx in _func_series_param_indices and isinstance(arg_node, Identifier): - aname = arg_node.name - # Bar field: pass _s_close instead of current_bar_.close - if aname in BAR_FIELDS or aname in BAR_SERIES_PUSH: - return f"_s_{aname}" - # Series var: pass the Series object directly - if aname in self.ctx.series_vars: - safe = self._safe_name(aname) - if self._active_var_remap and safe in self._active_var_remap: - safe = self._active_var_remap[safe] - return safe + if arg_idx in _func_series_param_indices: + if isinstance(arg_node, Identifier): + aname = arg_node.name + # Bar field: pass _s_close instead of current_bar_.close + if aname in BAR_FIELDS or aname in BAR_SERIES_PUSH: + return f"_s_{aname}" + # Series var: pass the Series object directly + if aname in self.ctx.series_vars: + safe = self._safe_name(aname) + if self._active_var_remap and safe in self._active_var_remap: + safe = self._active_var_remap[safe] + return safe + expr_cpp = self._visit_expr(arg_node) + cpp_t = self._infer_type(arg_node) + if cpp_t not in ("double", "int", "bool"): + cpp_t = "double" + return ( + f"([&]() -> const Series<{cpp_t}>& {{ " + f"static thread_local Series<{cpp_t}> _series_arg; " + f"if (is_first_tick_ && bar_index_ == 0) _series_arg.clear(); " + f"{cpp_t} _sv = ({expr_cpp}); " + f"if (is_first_tick_) _series_arg.push(_sv); " + f"else _series_arg.update(_sv); " + f"return _series_arg; }}())" + ) return self._visit_expr(arg_node) if node.kwargs: diff --git a/pineforge_codegen/codegen/visit_expr.py b/pineforge_codegen/codegen/visit_expr.py index 5fb35d2..4f6bce6 100644 --- a/pineforge_codegen/codegen/visit_expr.py +++ b/pineforge_codegen/codegen/visit_expr.py @@ -729,6 +729,18 @@ def _visit_binop(self, node: BinOp) -> str: right = self._visit_expr(node.right) cpp_ops = {"and": "&&", "or": "||"} op = cpp_ops.get(node.op, node.op) + if node.op == "+": + lt = self._infer_type(node.left) + rt = self._infer_type(node.right) + if lt == "std::string" or rt == "std::string": + def _as_string(rendered, inferred): + if inferred == "std::string": + return rendered + if inferred == "bool": + return f'(({rendered}) ? std::string("true") : std::string("false"))' + return f"std::to_string({rendered})" + + return f"({_as_string(left, lt)} + {_as_string(right, rt)})" # PineScript % works on floats — use std::fmod in C++ if node.op == "%": return f"std::fmod((double)({left}), (double)({right}))" diff --git a/pineforge_codegen/codegen/visit_stmt.py b/pineforge_codegen/codegen/visit_stmt.py index 041e19f..43ca387 100644 --- a/pineforge_codegen/codegen/visit_stmt.py +++ b/pineforge_codegen/codegen/visit_stmt.py @@ -258,6 +258,10 @@ def _visit_var_decl(self, node: VarDecl, lines: list[str], pad: str) -> None: # Global-scope non-var vars are class members — emit assignment, not declaration is_global_member = node.name in self._global_member_vars + def remember_local_type(cpp_type: str | None) -> None: + if cpp_type and not is_global_member and node.name in self._current_func_locals: + self._current_func_local_types[node.name] = cpp_type + # Check if it is a static (non-series) global member variable already evaluated inside _inputs_initialized_ block is_static_global_input = False if is_global_member and isinstance(node.value, FuncCall) and self._is_input_call(node.value): @@ -457,6 +461,7 @@ def _visit_var_decl(self, node: VarDecl, lines: list[str], pad: str) -> None: if is_global_member: lines.append(f"{pad}{safe} = {cpp_val};") else: + remember_local_type(cpp_type) lines.append(f"{pad}{cpp_type} {safe} = {cpp_val};") @staticmethod diff --git a/pineforge_codegen/signatures.py b/pineforge_codegen/signatures.py index 6ad6e8e..2da4d9f 100644 --- a/pineforge_codegen/signatures.py +++ b/pineforge_codegen/signatures.py @@ -133,8 +133,8 @@ def _ta(short_name: str, *sigs: FuncSig) -> None: # --- Volatility & Range --- _ta("atr", _sig([("length", I)])) _ta("tr", _sig([("handle_na", B, False)])) -_ta("stdev", _sig([("source", F), ("length", I)])) -_ta("variance",_sig([("source", F), ("length", I)])) +_ta("stdev", _sig([("source", F), ("length", I), ("biased", B, True)])) +_ta("variance",_sig([("source", F), ("length", I), ("biased", B, True)])) # --- Trend --- _ta("supertrend", _sig([("factor", F), ("atrPeriod", I)], diff --git a/pineforge_codegen/support_checker.py b/pineforge_codegen/support_checker.py index 15315b0..f9cffb1 100644 --- a/pineforge_codegen/support_checker.py +++ b/pineforge_codegen/support_checker.py @@ -462,6 +462,12 @@ def __init__(self, ast: Program, filename: str = "") -> None: # (``panel.cell(...)``) is a visual sink whose args may carry visual # constants, so it routes through ``_visit_children_const_ok``. self._visual_container_vars: set[str] = set() + # User helpers whose body is only a visual sink (for example + # ``cell(table t, ..., align) => t.cell(..., text_halign=align)``). + # A call to such a helper is itself a visual context, so style + # constants passed through its arguments are safe and should not trip + # the free-expression constant-namespace rejection. + self._visual_sink_funcs: set[str] = set() self._drawing_tuple_vars: set[str] = set() self._func_tuple_drawing_returns: dict[str, list[bool]] = {} # Track whether we are inside an if/ternary condition expression. @@ -520,6 +526,8 @@ def _collect_user_definitions(self, ast: Program) -> None: if tuple_returns: self._func_tuple_drawing_returns[stmt.name] = tuple_returns self._collect_visual_container_params(stmt) + if self._func_body_is_visual_sink(stmt): + self._visual_sink_funcs.add(stmt.name) elif isinstance(stmt, MethodDef): self._user_methods.add(stmt.name) self._collect_visual_container_params(stmt) @@ -535,6 +543,27 @@ def _collect_visual_container_params(self, fn) -> None: if hint and str(hint).replace(" ", "") in _VISUAL_CONTAINER_TYPES: self._visual_container_vars.add(pname) + def _expr_is_visual_sink_call(self, expr: ASTNode | None) -> bool: + if not isinstance(expr, FuncCall): + return False + ns, name = _qualified_name(expr.callee) + if ns is None and name in SKIP_FUNC_NAMES: + return True + if ns is not None and ns in SKIP_NAMESPACES: + return True + if ns in _DRAWING_NOOP_BY_NS and name in _DRAWING_NOOP_BY_NS[ns]: + return True + return ns is not None and ns in self._visual_container_vars + + def _func_body_is_visual_sink(self, fn: FuncDef | MethodDef) -> bool: + if not fn.body: + return False + for stmt in fn.body: + expr = stmt.expr if isinstance(stmt, ExprStmt) else None + if not self._expr_is_visual_sink_call(expr): + return False + return True + @staticmethod def _type_name_contains_drawing(type_name: str | None) -> bool: if not type_name: @@ -1207,6 +1236,10 @@ def _visit_FuncCall(self, node: FuncCall) -> None: self._visit_children_const_ok(node) return + if ns is None and name in self._visual_sink_funcs: + self._visit_children_const_ok(node) + return + self._visit_children(node) def _visit_Identifier(self, node: Identifier) -> None: diff --git a/tests/test_codegen_validation_fixes.py b/tests/test_codegen_validation_fixes.py index 48b8735..9f18f15 100644 --- a/tests/test_codegen_validation_fixes.py +++ b/tests/test_codegen_validation_fixes.py @@ -302,6 +302,90 @@ def test_drawing_array_constructor_default_value_arg(): assert "std::vector((size_t)(2)" in cpp +def test_untyped_var_drawing_array_constructor_emits_typed_member(): + cpp = _cpp( + "var boxes = array.new_box()\n" + "if bar_index == 0\n" + " b = box.new(bar_index, high, bar_index + 1, low)\n" + " array.push(boxes, b)\n" + "plot(array.size(boxes))" + ) + assert "std::vector boxes;" in cpp + assert "std::vector boxes;" not in cpp + + +def test_str_contains_udf_infers_bool_return_type(): + cpp = _cpp( + "hasXau() =>\n" + " str.contains(str.upper(syminfo.ticker), \"XAU\")\n" + "isGold = hasXau()\n" + "plot(isGold ? close : open)" + ) + assert "bool hasXau()" in cpp + assert "std::string hasXau()" not in cpp + + +def test_input_source_passed_to_history_udf_is_series_arg(): + cpp = _cpp( + "src = input.source(close, \"Source\")\n" + "lagged(_src, _len) =>\n" + " lag = math.floor((_len - 1) / 2)\n" + " _src + (_src - _src[lag])\n" + "z = lagged(src, 10)\n" + "plot(z)" + ) + assert "Series src" in cpp + assert "lagged_cs0(src, 10)" in cpp or "lagged(src, 10)" in cpp + assert "lagged_cs0(src[0], 10)" not in cpp + assert "lagged(src[0], 10)" not in cpp + + +def test_syminfo_pointvalue_infers_numeric_udf_return(): + cpp = _cpp( + "pointValue = syminfo.pointvalue\n" + "dollarsToPoints(dollars) =>\n" + " dollars / pointValue\n" + "x = dollarsToPoints(100.0)\n" + "plot(x)" + ) + assert "double dollarsToPoints(double dollars)" in cpp + assert "std::string dollarsToPoints" not in cpp + + +def test_timestamp_timezone_variable_uses_tz_overload(): + cpp = _cpp( + "tz = input.string(\"America/New_York\", \"Timezone\")\n" + "nyYear = year(time, tz)\n" + "rangeStart = timestamp(tz, nyYear, 1, 2, 9, 30)\n" + "plot(rangeStart)" + ) + assert "std::string _tz = (tz)" in cpp + assert "int _yr = (tz)" not in cpp + assert "mktime(&t)" in cpp + + +def test_ta_stdev_biased_arg_goes_to_constructor_not_compute(): + cpp = _cpp( + "x = ta.stdev(close, 3, false)\n" + "plot(x)" + ) + assert "ta::StdDev(3, false)" in cpp + assert ".compute(current_bar_.close, false)" not in cpp + assert ".recompute(current_bar_.close, false)" not in cpp + + +def test_text_align_wrapper_param_infers_string(): + cpp = _cpp( + "var table dash = table.new(position.top_right, 1, 1)\n" + "cell(alignMode) =>\n" + " table.cell(dash, 0, 0, \"x\", text_halign = alignMode)\n" + "cell(text.align_right)\n" + "plot(close)" + ) + assert "cell(std::string alignMode)" in cpp + assert "cell(int alignMode)" not in cpp + + # --------------------------------------------------------------------------- # Round 2: tuple-element type retention + UDF param/return type inference # (jevondijefferson / thulashimohanr blockers) @@ -574,4 +658,3 @@ def test_function_scoped_var_not_in_constructor_init_list(): import re m = re.search(r"GeneratedStrategy\(\)\s*:([^\n]*)", cpp) assert m is None or "c(5)" not in m.group(0) - diff --git a/tests/test_support_checker.py b/tests/test_support_checker.py index f5d0ee7..1470c0f 100644 --- a/tests/test_support_checker.py +++ b/tests/test_support_checker.py @@ -493,6 +493,25 @@ def test_label_geometry_accepted_visual_setter_warns(): assert _errors(PRELUDE + 'lb = label.new(bar_index, high, "x")\nlabel.bogus(lb)\n') +def test_table_cell_visual_wrapper_allows_style_constants(): + src = PRELUDE + """ +cell(table t, int c, int r, string txt, align) => + t.cell(c, r, txt, text_halign = align) + +var table dash = table.new(position.top_right, 1, 1) +cell(dash, 0, 0, "ok", text.align_right) +""" + assert _errors(src) == [] + + +def test_visual_style_constant_still_rejected_in_non_visual_wrapper(): + src = PRELUDE + """ +passthrough(x) => x +v = passthrough(text.align_right) +""" + _expect_error(src, "text.align_right") + + def test_udt_drawing_field_history_rejected(): src = PRELUDE + """ type DrawState From 64ac9ded152dc0812b62f70b0fadcc999681a51e Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sat, 4 Jul 2026 01:43:12 +0800 Subject: [PATCH 02/15] Fix TA precalc and relative exit tick lowering Guard static TA precalculation so user series aliases that are not replayed in precalculate() use the live per-bar TA path instead of producing all-na results. Also emit strategy.exit profit/loss as relative tick offsets for the engine instead of freezing prices from position_entry_price_ before pending entries fill. --- pineforge_codegen/codegen/base.py | 2 +- pineforge_codegen/codegen/emit_top.py | 10 +-- pineforge_codegen/codegen/ta.py | 88 ++++++++++++++++++++++++- pineforge_codegen/codegen/visit_call.py | 15 +++-- tests/test_codegen_validation_fixes.py | 29 ++++++++ 5 files changed, 130 insertions(+), 14 deletions(-) diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py index de41600..d932e53 100644 --- a/pineforge_codegen/codegen/base.py +++ b/pineforge_codegen/codegen/base.py @@ -1404,7 +1404,7 @@ def generate(self) -> str: # 3. TA members for site in self.ctx.ta_call_sites: lines.append(f" {site.class_name} {site.member_name};") - if getattr(site, "is_static", False): + if self._ta_site_uses_precalc(site): vtype = self._ta_return_type(site) lines.append(f" std::vector<{vtype}> _precalc_{site.member_name};") lines.append(" bool _use_precalc = false;") diff --git a/pineforge_codegen/codegen/emit_top.py b/pineforge_codegen/codegen/emit_top.py index 2c99aa9..3f85e77 100644 --- a/pineforge_codegen/codegen/emit_top.py +++ b/pineforge_codegen/codegen/emit_top.py @@ -1034,7 +1034,7 @@ class member but its initializer was dropped, leaving the member ``na`` lines.append(" }") def _emit_precalculate_and_run(self, lines: list[str]) -> None: - has_static_ta = any(getattr(site, "is_static", False) for site in self.ctx.ta_call_sites) + has_static_ta = any(self._ta_site_uses_precalc(site) for site in self.ctx.ta_call_sites) if not has_static_ta: return @@ -1045,13 +1045,13 @@ def _emit_precalculate_and_run(self, lines: list[str]) -> None: # Resize precalculated vectors for site in self.ctx.ta_call_sites: - if getattr(site, "is_static", False): + if self._ta_site_uses_precalc(site): lines.append(f" _precalc_{site.member_name}.resize(n);") # Reset indicators to clean slate lines.append("") for site in self.ctx.ta_call_sites: - if getattr(site, "is_static", False): + if self._ta_site_uses_precalc(site): resolved = [self._resolve_known(a) for a in site.ctor_args] safe_resolved = [] for r in resolved: @@ -1140,7 +1140,7 @@ def _emit_precalculate_and_run(self, lines: list[str]) -> None: self._precalc_loop_active = True try: for site in self.ctx.ta_call_sites: - if getattr(site, "is_static", False): + if self._ta_site_uses_precalc(site): compute_args = self._ta_compute_args_for_site(site) compute_args_bars = compute_args.replace("current_bar_.", "bars[i].") lines.append(f" _precalc_{site.member_name}[i] = {site.member_name}.compute({compute_args_bars});") @@ -1152,7 +1152,7 @@ def _emit_precalculate_and_run(self, lines: list[str]) -> None: # Reset indicators and series for the real backtest run lines.append("") for site in self.ctx.ta_call_sites: - if getattr(site, "is_static", False): + if self._ta_site_uses_precalc(site): resolved = [self._resolve_known(a) for a in site.ctor_args] safe_resolved = [] for r in resolved: diff --git a/pineforge_codegen/codegen/ta.py b/pineforge_codegen/codegen/ta.py index 00a6858..aff5213 100644 --- a/pineforge_codegen/codegen/ta.py +++ b/pineforge_codegen/codegen/ta.py @@ -28,7 +28,9 @@ from typing import TYPE_CHECKING from ..ast_nodes import ( - Assignment, BinOp, ExprStmt, FuncCall, Ternary, UnaryOp, VarDecl, + Assignment, BinOp, BoolLiteral, ColorLiteral, ExprStmt, FuncCall, + Identifier, MemberAccess, NaLiteral, NumberLiteral, StringLiteral, + Subscript, Ternary, TupleLiteral, UnaryOp, VarDecl, ) from .tables import TA_IMPLICIT_APPEND, TA_IMPLICIT_COMPUTE_FULL @@ -221,6 +223,90 @@ def _ta_compute_args_for_site(self, site: "TACallSite") -> str: return "" + # ------------------------------------------------------------------ + # Precalculation safety + # ------------------------------------------------------------------ + + _PRECALC_BAR_IDENTIFIERS = { + "open", "high", "low", "close", "volume", + "hl2", "hlc3", "ohlc4", "hlcc4", + "time", "time_close", "bar_index", + } + + def _is_precalc_replayed_source_var(self, name: str) -> bool: + """True for top-level ``x = input.source(...)`` variables replayed in + ``precalculate()``. + + The precompute loop explicitly advances native source series and then + replays those source-input assignments before computing static TA + sites. Other user aliases, even when they are statically derived from + bar data (``src = close`` / ``ha_close = close``), are not replayed + there and must therefore use the normal per-bar TA path.""" + ast = getattr(self.ctx, "ast", None) + for stmt in getattr(ast, "body", ()): + if ( + isinstance(stmt, VarDecl) + and stmt.name == name + and isinstance(stmt.value, FuncCall) + and self._is_source_input(stmt.value) + ): + return True + return False + + def _expr_safe_for_ta_precalc(self, expr) -> bool: + if expr is None: + return True + if isinstance(expr, (NumberLiteral, StringLiteral, BoolLiteral, NaLiteral, ColorLiteral)): + return True + if isinstance(expr, Identifier): + if expr.name in self._PRECALC_BAR_IDENTIFIERS: + return True + if self._is_precalc_replayed_source_var(expr.name): + return True + if expr.name in getattr(self.ctx, "series_vars", set()): + return False + return expr.name in getattr(self, "_static_vars", set()) + if isinstance(expr, MemberAccess): + if isinstance(expr.object, Identifier) and ( + expr.object.name.startswith("input") or expr.object.name in getattr(self, "_enum_defs", {}) + ): + return True + return self._expr_safe_for_ta_precalc(expr.object) + if isinstance(expr, BinOp): + return self._expr_safe_for_ta_precalc(expr.left) and self._expr_safe_for_ta_precalc(expr.right) + if isinstance(expr, UnaryOp): + return self._expr_safe_for_ta_precalc(expr.operand) + if isinstance(expr, Ternary): + return ( + self._expr_safe_for_ta_precalc(expr.condition) + and self._expr_safe_for_ta_precalc(expr.true_val) + and self._expr_safe_for_ta_precalc(expr.false_val) + ) + if isinstance(expr, Subscript): + return self._expr_safe_for_ta_precalc(expr.object) and self._expr_safe_for_ta_precalc(expr.index) + if isinstance(expr, TupleLiteral): + return all(self._expr_safe_for_ta_precalc(elem) for elem in expr.elements) + if isinstance(expr, FuncCall): + if isinstance(expr.callee, MemberAccess) and isinstance(expr.callee.object, Identifier): + if expr.callee.object.name in ("math", "str", "color"): + return all(self._expr_safe_for_ta_precalc(arg) for arg in expr.args) + return False + return False + + def _ta_site_uses_precalc(self, site: "TACallSite") -> bool: + """Whether a static TA site can safely read from ``_precalc_*``. + + Static-ness from the analyzer means the expression can be represented + from bar data and constants, but the standalone precompute loop only + replays a narrow subset of per-bar assignments. A user alias such as + ``ha_close = close`` is static in that analyzer sense, yet its Series is + empty during precompute, so ``ta.stdev(ha_close, 20)`` precalculates as + all-``na``. Opting that site out preserves correctness; it simply uses + the ordinary stateful TA object during ``on_bar``.""" + if not getattr(site, "is_static", False): + return False + return all(self._expr_safe_for_ta_precalc(arg) for arg in site.compute_args) + def _security_ta_compute_args_for_site( self, sec_id: int, diff --git a/pineforge_codegen/codegen/visit_call.py b/pineforge_codegen/codegen/visit_call.py index 1ce77e1..a0576a6 100644 --- a/pineforge_codegen/codegen/visit_call.py +++ b/pineforge_codegen/codegen/visit_call.py @@ -388,9 +388,10 @@ def _visit_func_call(self, node: FuncCall) -> str: if site is not None: compute_args = self._ta_compute_args_for_site(site) ta_mem = self._ta_member_name(site) - if getattr(self, "_precalc_loop_active", False) and getattr(site, "is_static", False): + uses_precalc = self._ta_site_uses_precalc(site) + if getattr(self, "_precalc_loop_active", False) and uses_precalc: return f"_precalc_{ta_mem}[i]" - if getattr(site, "is_static", False): + if uses_precalc: return f"(_use_precalc ? _precalc_{ta_mem}[bar_index_] : (is_first_tick_ ? {ta_mem}.compute({compute_args}) : {ta_mem}.recompute({compute_args})))" return f"(is_first_tick_ ? {ta_mem}.compute({compute_args}) : {ta_mem}.recompute({compute_args}))" @@ -1277,17 +1278,17 @@ def _visit_strategy_call(self, func_name: str, node: FuncCall) -> str: qty_val = self._visit_expr(qty_n) if qty_n else "na()" comment = self._visit_expr(comment_n) if comment_n is not None else '""' oca_val = self._visit_expr(oca_name_n) if oca_name_n is not None else '""' + profit_ticks = "na()" + loss_ticks = "na()" if profit_n and not limit_n: - ticks = self._visit_expr(profit_n) - limit_val = f"(position_entry_price_ + (signed_position_size() > 0 ? 1.0 : -1.0) * ({ticks}) * syminfo_mintick_)" + profit_ticks = self._visit_expr(profit_n) if loss_n and not stop_n: - ticks = self._visit_expr(loss_n) - stop_val = f"(position_entry_price_ - (signed_position_size() > 0 ? 1.0 : -1.0) * ({ticks}) * syminfo_mintick_)" + loss_ticks = self._visit_expr(loss_n) return (f"strategy_exit({exit_id}, {from_id}, {limit_val}, {stop_val}, " f"{trail_pts}, {trail_off}, {trail_pr}, {qty_pct}, {comment}, " - f"{qty_val}, {oca_val})") + f"{qty_val}, {oca_val}, {profit_ticks}, {loss_ticks})") close_comment = self._visit_expr(comment_n) if comment_n is not None else '""' return f"strategy_close({exit_id}, {close_comment})" diff --git a/tests/test_codegen_validation_fixes.py b/tests/test_codegen_validation_fixes.py index 9f18f15..e192660 100644 --- a/tests/test_codegen_validation_fixes.py +++ b/tests/test_codegen_validation_fixes.py @@ -374,6 +374,18 @@ def test_ta_stdev_biased_arg_goes_to_constructor_not_compute(): assert ".recompute(current_bar_.close, false)" not in cpp +def test_ta_precalc_skips_user_series_alias_source(): + cpp = _cpp( + "ha_close = close\n" + "bbLen = input.int(20, \"BB Length\")\n" + "dev = ta.stdev(ha_close, bbLen) * 2.0\n" + "plot(dev)" + ) + assert "std::vector _precalc__ta_stdev" not in cpp + assert "_use_precalc ? _precalc__ta_stdev" not in cpp + assert "_ta_stdev_1.compute(ha_close)" in cpp + + def test_text_align_wrapper_param_infers_string(): cpp = _cpp( "var table dash = table.new(position.top_right, 1, 1)\n" @@ -658,3 +670,20 @@ def test_function_scoped_var_not_in_constructor_init_list(): import re m = re.search(r"GeneratedStrategy\(\)\s*:([^\n]*)", cpp) assert m is None or "c(5)" not in m.group(0) + + +def test_strategy_exit_profit_loss_passes_relative_ticks_to_engine(): + # strategy.exit(profit/loss) can be issued while its entry is still pending. + # Codegen must not convert the tick offsets to absolute prices using + # position_entry_price_ at call time, because the actual entry fill may not + # exist until the next bar. + cpp = _cpp( + "if bar_index == 0\n" + " strategy.entry(\"L\", strategy.long)\n" + " strategy.exit(\"X\", \"L\", profit=40, loss=20)\n" + "plot(close)" + ) + assert "position_entry_price_ +" not in cpp + assert "position_entry_price_ -" not in cpp + assert 'strategy_exit(std::string("X"), std::string("L"), na(), na()' in cpp + assert ", 100.0, \"\", na(), \"\", 40, 20);" in cpp From bacc8d79412c377c97059507b3453f6927a3b204 Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sat, 4 Jul 2026 01:54:44 +0800 Subject: [PATCH 03/15] Fix string type inference for local and UDT fields Teach codegen type inference to remember inline local declaration types outside helper functions and to carry array element TypeSpecs for for-in loop variables. This prevents string concatenation from wrapping existing strings, including UDT string fields and local string labels, in std::to_string; fixes standard compile failures in generic dashboard/label code. --- pineforge_codegen/codegen/base.py | 1 + pineforge_codegen/codegen/types.py | 6 +++++ pineforge_codegen/codegen/visit_stmt.py | 13 ++++++++++- tests/test_codegen_validation_fixes.py | 30 +++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py index d932e53..3e3ebdd 100644 --- a/pineforge_codegen/codegen/base.py +++ b/pineforge_codegen/codegen/base.py @@ -441,6 +441,7 @@ def __init__(self, ctx: AnalyzerContext) -> None: self._current_func_local_types: dict[str, str] = {} # for-in loop iterator names (must resolve member access, not enum fallback) self._current_loop_vars: set[str] = set() + self._current_loop_var_specs: dict[str, "TypeSpec"] = {} # Track array variables for codegen self._array_vars: set[str] = set() # Track map variables for codegen diff --git a/pineforge_codegen/codegen/types.py b/pineforge_codegen/codegen/types.py index abb166b..440cd63 100644 --- a/pineforge_codegen/codegen/types.py +++ b/pineforge_codegen/codegen/types.py @@ -210,6 +210,9 @@ def _type_spec_from_expr(self, node) -> TypeSpec | None: if isinstance(node, StringLiteral): return TypeSpec.primitive("string") if isinstance(node, Identifier): + loop_specs = getattr(self, "_current_loop_var_specs", None) + if loop_specs and node.name in loop_specs: + return loop_specs[node.name] if node.name in self._collection_types: return self._collection_types[node.name] if node.name in self._udt_var_types: @@ -898,6 +901,9 @@ def _infer_type(self, node) -> str: sym_key = f"syminfo.{node.member}" if sym_key in _pf_sigs.SYMINFO_VARIABLES: return PINE_TYPE_TO_CPP.get(_pf_sigs.SYMINFO_VARIABLES[sym_key], "double") + spec = self._type_spec_from_expr(node) + if spec is not None: + return self._type_spec_to_cpp(spec) if isinstance(node, Ternary): tt = self._infer_type(node.true_val) ft = self._infer_type(node.false_val) diff --git a/pineforge_codegen/codegen/visit_stmt.py b/pineforge_codegen/codegen/visit_stmt.py index 43ca387..cc76948 100644 --- a/pineforge_codegen/codegen/visit_stmt.py +++ b/pineforge_codegen/codegen/visit_stmt.py @@ -259,7 +259,7 @@ def _visit_var_decl(self, node: VarDecl, lines: list[str], pad: str) -> None: is_global_member = node.name in self._global_member_vars def remember_local_type(cpp_type: str | None) -> None: - if cpp_type and not is_global_member and node.name in self._current_func_locals: + if cpp_type and not is_global_member: self._current_func_local_types[node.name] = cpp_type # Check if it is a static (non-series) global member variable already evaluated inside _inputs_initialized_ block @@ -792,9 +792,19 @@ def _visit_for_in(self, node, lines: list[str], indent: int) -> None: pad = " " * indent iterable = self._visit_expr(node.iterable) saved_loop = self._current_loop_vars + saved_loop_specs = self._current_loop_var_specs self._current_loop_vars = set(self._current_loop_vars) + self._current_loop_var_specs = dict(self._current_loop_var_specs) + iterable_spec = self._type_spec_from_expr(node.iterable) + elem_spec = ( + iterable_spec.element + if iterable_spec is not None and iterable_spec.kind == "array" + else None + ) if node.var: self._current_loop_vars.add(node.var) + if elem_spec is not None: + self._current_loop_var_specs[node.var] = elem_spec if node.vars: for v in node.vars: if v != "_": @@ -814,6 +824,7 @@ def _visit_for_in(self, node, lines: list[str], indent: int) -> None: self._pop_block_var_remap(_blk_saved) lines.append(f"{pad}}}") self._current_loop_vars = saved_loop + self._current_loop_var_specs = saved_loop_specs def _visit_while(self, node: WhileStmt, lines: list[str], indent: int) -> None: pad = " " * indent diff --git a/tests/test_codegen_validation_fixes.py b/tests/test_codegen_validation_fixes.py index e192660..8dda5d3 100644 --- a/tests/test_codegen_validation_fixes.py +++ b/tests/test_codegen_validation_fixes.py @@ -687,3 +687,33 @@ def test_strategy_exit_profit_loss_passes_relative_ticks_to_engine(): assert "position_entry_price_ -" not in cpp assert 'strategy_exit(std::string("X"), std::string("L"), na(), na()' in cpp assert ", 100.0, \"\", na(), \"\", 40, 20);" in cpp + + +def test_string_concat_preserves_top_level_local_string_types(): + cpp = _cpp( + "if barstate.islast\n" + " role_txt = close > open ? \"run\" : \"next\"\n" + " status_icon = close > open ? \"ok\" : \" \"\n" + " row_label = status_icon + \"DCA-\" + str.tostring(bar_index) + role_txt\n" + " label.new(bar_index, close, row_label)\n" + "plot(close)" + ) + assert "std::to_string(status_icon)" not in cpp + assert "std::to_string(role_txt)" not in cpp + assert "std::string row_label" in cpp + + +def test_string_concat_preserves_udt_for_in_field_string_type(): + cpp = _cpp( + "type Level\n" + " string name\n" + " float price\n" + "var levels = array.new()\n" + "if bar_index == 0\n" + " array.push(levels, Level.new(\"PDH\", high))\n" + "for lvl in levels\n" + " label.new(bar_index, lvl.price, \"hit \" + lvl.name)\n" + "plot(close)" + ) + assert "std::to_string(lvl.name)" not in cpp + assert 'std::string("hit ") + lvl.name' in cpp From 8567065298dac0b8f2332dfb4713cb3d7c665368 Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sat, 4 Jul 2026 02:16:16 +0800 Subject: [PATCH 04/15] Normalize TradingView GMT offsets in generated time code Generated hour/day/time-field lambdas and timestamp(timezone, ...) used POSIX TZ directly. Emit calls to the engine timezone normalizer so TradingView-style GMT+/- and UTC+/- strings resolve with the same sign convention as Pine.\n\nUpdates timezone and timestamp regression tests to pin the normalized emission. --- pineforge_codegen/codegen/tables.py | 2 +- pineforge_codegen/codegen/visit_call.py | 2 +- tests/test_codegen_audit_fixes.py | 1 + tests/test_codegen_new.py | 2 ++ tests/test_codegen_validation_fixes.py | 2 +- 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pineforge_codegen/codegen/tables.py b/pineforge_codegen/codegen/tables.py index 243cb9d..5b2d246 100644 --- a/pineforge_codegen/codegen/tables.py +++ b/pineforge_codegen/codegen/tables.py @@ -55,7 +55,7 @@ def tz_time_field_lambda(field_expr: str, ts_arg: str, tz_arg: str) -> str: """ return ( "[&]() -> int { " - f"std::string _tz = ({tz_arg}); " + f"std::string _tz = pineforge::normalize_timezone_for_posix(({tz_arg})); " f"time_t _secs = (time_t)(({ts_arg}) / 1000); " "struct tm tm_buf; " "if (_tz.empty() || _tz == \"UTC\" || _tz == \"Etc/UTC\") { " diff --git a/pineforge_codegen/codegen/visit_call.py b/pineforge_codegen/codegen/visit_call.py index a0576a6..32a7c1c 100644 --- a/pineforge_codegen/codegen/visit_call.py +++ b/pineforge_codegen/codegen/visit_call.py @@ -760,7 +760,7 @@ def _visit_func_call(self, node: FuncCall) -> str: sc = args[6] if len(args) > 6 else "0" return ( f"[&]() -> int64_t {{ " - f"std::string _tz = ({tz}); " + f"std::string _tz = pineforge::normalize_timezone_for_posix(({tz})); " f"int _yr = ({yr}); int _mo = ({mo}); int _dy = ({dy}); " f"int _hr = ({hr}); int _min = ({mn}); int _sc = ({sc}); " f"static thread_local std::string _last_tz; " diff --git a/tests/test_codegen_audit_fixes.py b/tests/test_codegen_audit_fixes.py index 31cba7d..07a364b 100644 --- a/tests/test_codegen_audit_fixes.py +++ b/tests/test_codegen_audit_fixes.py @@ -82,6 +82,7 @@ def test_timestamp_numeric_form_works(): def test_timestamp_tz_form_works(): cpp = _gen('t = timestamp("GMT+2", 2020, 1, 2)\nplot(close)\n') + assert "normalize_timezone_for_posix" in cpp assert "mktime" in cpp diff --git a/tests/test_codegen_new.py b/tests/test_codegen_new.py index 3950fb3..df28396 100644 --- a/tests/test_codegen_new.py +++ b/tests/test_codegen_new.py @@ -238,6 +238,7 @@ def test_hour_two_arg_passes_tz(): ) cpp = _generate(src) assert "America/New_York" in cpp + assert "normalize_timezone_for_posix" in cpp # Two-arg form must use localtime_r (with the TZ env mutation) rather # than just gmtime_r — that is the whole point of the tz argument. assert "localtime_r" in cpp @@ -263,6 +264,7 @@ def test_hour_one_arg_uses_syminfo_timezone(): # TV docs. The default ``SymInfo::timezone`` of "UTC" keeps the # cheap gmtime_r path active for crypto. assert "syminfo_.timezone" in cpp + assert "normalize_timezone_for_posix" in cpp # The chart-display TZ slot must NOT leak into the bar-time lambda; # if it ever does, this test catches the regression. assert "chart_timezone_" not in cpp diff --git a/tests/test_codegen_validation_fixes.py b/tests/test_codegen_validation_fixes.py index 8dda5d3..cf58a0c 100644 --- a/tests/test_codegen_validation_fixes.py +++ b/tests/test_codegen_validation_fixes.py @@ -359,7 +359,7 @@ def test_timestamp_timezone_variable_uses_tz_overload(): "rangeStart = timestamp(tz, nyYear, 1, 2, 9, 30)\n" "plot(rangeStart)" ) - assert "std::string _tz = (tz)" in cpp + assert "std::string _tz = pineforge::normalize_timezone_for_posix((tz))" in cpp assert "int _yr = (tz)" not in cpp assert "mktime(&t)" in cpp From 93cdec91190070c24048b2931d60bcc8b2efe6bb Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sat, 4 Jul 2026 03:01:34 +0800 Subject: [PATCH 05/15] Fix comma statement and typed na lowering Preserve comma-separated simple statements so side-effecting assignments and array calls after the first comma are emitted. Lower namespaced array.fill and array value arguments with typed na/default values instead of skipping or emitting incompatible na() values. Lower Pine v6 bool(x) casts so numeric na maps to false instead of C++ treating NaN as truthy. Refresh the matrix PCA golden after the prior TA-precalc safety change and cover the new validation cases. --- pineforge_codegen/codegen/base.py | 2 +- pineforge_codegen/codegen/visit_call.py | 69 +++++++++++++++++++++---- pineforge_codegen/parser.py | 40 ++++++++++++-- tests/golden/matrix_eigen_pca.cpp | 43 --------------- tests/test_codegen_validation_fixes.py | 46 ++++++++++++++++- tests/test_parser.py | 15 ++++++ 6 files changed, 156 insertions(+), 59 deletions(-) diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py index 3e3ebdd..de9a0b3 100644 --- a/pineforge_codegen/codegen/base.py +++ b/pineforge_codegen/codegen/base.py @@ -1790,7 +1790,7 @@ def _is_skip_expr(self, node) -> bool: if self._is_chart_point_callee(node.callee): return False func_name, namespace = self._resolve_callee(node.callee) - if func_name in SKIP_FUNC_NAMES: + if namespace is None and func_name in SKIP_FUNC_NAMES: return True if namespace in SKIP_NAMESPACES: return True diff --git a/pineforge_codegen/codegen/visit_call.py b/pineforge_codegen/codegen/visit_call.py index 32a7c1c..65a076c 100644 --- a/pineforge_codegen/codegen/visit_call.py +++ b/pineforge_codegen/codegen/visit_call.py @@ -210,6 +210,44 @@ class CallVisitor: # Function-call dispatch # ------------------------------------------------------------------ + def _array_init_value_expr(self, elem_spec: TypeSpec | None, value_node) -> str: + if isinstance(value_node, NaLiteral): + if elem_spec is not None and elem_spec.kind == "udt": + return self._default_for_spec(elem_spec) + cpp_type = self._type_spec_to_cpp(elem_spec) + if cpp_type in ("double", "int", "int64_t", "bool", "std::string"): + return f"na<{cpp_type}>()" + return self._default_for_spec(elem_spec) + return self._visit_expr(value_node) + + def _array_method_args( + self, method: str, arg_nodes: list, spec: TypeSpec | None, + ) -> list[str]: + elem_spec = ( + spec.element + if spec is not None and spec.kind == "array" and spec.element is not None + else TypeSpec.primitive("float") + ) + value_arg_indexes = { + "set": {1}, + "push": {0}, + "unshift": {0}, + "insert": {1}, + "fill": {0}, + "includes": {0}, + "indexof": {0}, + "lastindexof": {0}, + "binary_search": {0}, + "binary_search_leftmost": {0}, + "binary_search_rightmost": {0}, + }.get(method, set()) + return [ + self._array_init_value_expr(elem_spec, arg) + if idx in value_arg_indexes + else self._visit_expr(arg) + for idx, arg in enumerate(arg_nodes) + ] + def _visit_func_call(self, node: FuncCall) -> str: callee = node.callee if isinstance(callee, MemberAccess): @@ -271,7 +309,9 @@ def _visit_func_call(self, node: FuncCall) -> str: meth = callee.member raw_args = [self._visit_expr(a) for a in node.args] if recv_spec is not None and recv_spec.kind == "array" and meth in ARRAY_METHODS: - return self._array_method_expr(recv, meth, raw_args, recv_spec) + return self._array_method_expr( + recv, meth, self._array_method_args(meth, node.args, recv_spec), recv_spec + ) if recv_spec is not None and recv_spec.kind == "map" and meth in MAP_METHODS: return self._map_method_expr(recv, meth, raw_args, recv_spec) args = ", ".join(raw_args) @@ -295,7 +335,9 @@ def _visit_func_call(self, node: FuncCall) -> str: return self._map_method_expr(m, meth_raw, margs, self._map_spec_for_name(oname)) if oname in self._array_vars and meth_raw in ARRAY_METHODS: arr = self._safe_name(oname) - margs = [self._visit_expr(a) for a in node.args] + margs = self._array_method_args( + meth_raw, node.args, self._array_spec_for_name(oname) + ) return self._array_method_expr(arr, meth_raw, margs, self._array_spec_for_name(oname)) if oname in self._matrix_specs and meth_raw in MATRIX_METHODS: arr = self._safe_name(oname) @@ -453,8 +495,9 @@ def _visit_func_call(self, node: FuncCall) -> str: # Array method syntax: arr.push(val) where namespace is the array variable name if namespace is not None and namespace in self._array_vars and func_name in ARRAY_METHODS: arr = self._safe_name(namespace) - args = [self._visit_expr(a) for a in node.args] - return self._array_method_expr(arr, func_name, args, self._array_spec_for_name(namespace)) + spec = self._array_spec_for_name(namespace) + args = self._array_method_args(func_name, node.args, spec) + return self._array_method_expr(arr, func_name, args, spec) # Array operations — emit proper C++ vector operations if namespace == "array": @@ -466,11 +509,7 @@ def _visit_func_call(self, node: FuncCall) -> str: if node.args: size_arg = self._visit_expr(node.args[0]) if len(node.args) > 1: - init_val = ( - init_default - if isinstance(node.args[1], NaLiteral) - else self._visit_expr(node.args[1]) - ) + init_val = self._array_init_value_expr(elem_spec, node.args[1]) else: init_val = init_default return f"{cpp_type}((size_t)({size_arg}), {init_val})" @@ -482,8 +521,8 @@ def _visit_func_call(self, node: FuncCall) -> str: # Method calls: array.method(arr, args...) if func_name in ARRAY_METHODS and node.args: arr = self._visit_expr(node.args[0]) - rest = [self._visit_expr(a) for a in node.args[1:]] spec = self._type_spec_from_expr(node.args[0]) + rest = self._array_method_args(func_name, node.args[1:], spec) return self._array_method_expr(arr, func_name, rest, spec) return "0" @@ -852,7 +891,15 @@ def _visit_func_call(self, node: FuncCall) -> str: if func_name == "float" and namespace is None and node.args: return f"(double)({self._visit_expr(node.args[0])})" if func_name == "bool" and namespace is None and node.args: - return f"(bool)({self._visit_expr(node.args[0])})" + # Pine v6 bools are two-state. Explicit bool(int/float) treats na + # like false, while a raw C++ cast would make NaN truthy. + x = self._visit_expr(node.args[0]) + return ( + f"[&](){{ auto _pf_v = ({x}); " + f"using _pf_t = std::decay_t; " + f"if constexpr (std::is_same_v<_pf_t, bool>) {{ return _pf_v; }} " + f"else {{ return is_na(_pf_v) ? false : (bool)_pf_v; }} }}()" + ) if func_name == "string" and namespace is None and node.args: # Pine string(x) cast — same emission as str.tostring(x), with # string passthrough and TV-style "true"/"false" for bools diff --git a/pineforge_codegen/parser.py b/pineforge_codegen/parser.py index 74cbbd6..6d1a957 100644 --- a/pineforge_codegen/parser.py +++ b/pineforge_codegen/parser.py @@ -166,6 +166,28 @@ def _recover(self) -> None: # ------------------------------------------------------------------ def _parse_statement(self): + stmt = self._parse_single_statement() + if not self._check(TokenType.COMMA): + return stmt + + stmts: list = [] + self._extend_statement_list(stmts, stmt) + while self._match(TokenType.COMMA): + if self._check(TokenType.NEWLINE) or self._check(TokenType.DEDENT) or self._at_end(): + break + self._extend_statement_list(stmts, self._parse_single_statement()) + return stmts + + @staticmethod + def _extend_statement_list(stmts: list, stmt) -> None: + if stmt is None: + return + if isinstance(stmt, list): + stmts.extend(stmt) + else: + stmts.append(stmt) + + def _parse_single_statement(self): cur = self._current() # Control flow keywords @@ -374,12 +396,24 @@ def _parse_var_decl(self) -> VarDecl | list: first = VarDecl(name=name_tok.value, value=value) self._set_loc(first, start_tok) - # Check for comma-separated additional declarations: x=1, y=2, z=3 - if not self._check(TokenType.COMMA): + # Check for comma-separated additional declarations: x=1, y=2, z=3. + # Other comma-separated simple statements (``a := 1, b := 2`` or + # ``array.fill(a, na), array.set(a, 0, 1)``) are handled by the + # statement wrapper above, so do not greedily consume their comma. + if not ( + self._check(TokenType.COMMA) + and self._peek().type == TokenType.IDENT + and self._peek(2).type == TokenType.EQUALS + ): return first decls = [first] - while self._match(TokenType.COMMA): + while ( + self._check(TokenType.COMMA) + and self._peek().type == TokenType.IDENT + and self._peek(2).type == TokenType.EQUALS + ): + self._advance() st = self._current() n = self._consume(TokenType.IDENT) self._consume(TokenType.EQUALS) diff --git a/tests/golden/matrix_eigen_pca.cpp b/tests/golden/matrix_eigen_pca.cpp index b7ab028..9b2de1f 100644 --- a/tests/golden/matrix_eigen_pca.cpp +++ b/tests/golden/matrix_eigen_pca.cpp @@ -96,9 +96,7 @@ static inline std::string _pf_derive_country(const std::string& tickerid) { class GeneratedStrategy : public BacktestEngine { public: ta::SMA _ta_sma_1; - std::vector _precalc__ta_sma_1; ta::SMA _ta_sma_2; - std::vector _precalc__ta_sma_2; ta::SMA _ta_sma_3; ta::SMA _ta_sma_4; ta::SMA _ta_sma_5; @@ -200,47 +198,6 @@ class GeneratedStrategy : public BacktestEngine { } } - void precalculate(const Bar* bars, int n) { - _use_precalc = false; - if (n <= 0 || bars == nullptr) return; - - _precalc__ta_sma_1.resize(n); - _precalc__ta_sma_2.resize(n); - - _ta_sma_1 = ta::SMA(14); - _ta_sma_2 = ta::SMA(14); - - - for (int i = 0; i < n; ++i) { - _precalc__ta_sma_1[i] = _ta_sma_1.compute(v1); - _precalc__ta_sma_2[i] = _ta_sma_2.compute(v2); - } - - _ta_sma_1 = ta::SMA(14); - _ta_sma_2 = ta::SMA(14); - - _use_precalc = true; - } - - void run(const Bar* bars, int n) { - precalculate(bars, n); - BacktestEngine::run(bars, n); - } - - void run(const Bar* input_bars, int n_input, - const std::string& input_tf, - const std::string& script_tf, - bool bar_magnifier = false, - int magnifier_samples = 4, - MagnifierDistribution magnifier_dist = MagnifierDistribution::ENDPOINTS) { - bool needs_dynamic = bar_magnifier || !input_tf.empty() || !script_tf.empty(); - if (needs_dynamic) { - _use_precalc = false; - } else { - precalculate(input_bars, n_input); - } - BacktestEngine::run(input_bars, n_input, input_tf, script_tf, bar_magnifier, magnifier_samples, magnifier_dist); - } }; diff --git a/tests/test_codegen_validation_fixes.py b/tests/test_codegen_validation_fixes.py index cf58a0c..114a209 100644 --- a/tests/test_codegen_validation_fixes.py +++ b/tests/test_codegen_validation_fixes.py @@ -1,6 +1,6 @@ """Regression tests for codegen bugs found by pinescript-scrapper validation. -Covers five fix families: +Covers six fix families: 1. drawing-handle ``na`` reset/assignment (Box{}/Line{}/... not na()), plus typed ``na`` for string/int/bool declaration init. 2. void drawing setter used as a UDF's last expression / if-branch value. @@ -9,6 +9,8 @@ 4. parser handling of ``T[]`` array-typed function parameters (``float[] arr``, ``line[] ln``) — previously the whole function was dropped. 5. typed drawing array constructors ``array.new_line/box/label/linefill``. + 6. Pine v6 bool casts that must treat ``na`` as ``false`` instead of C++'s + truthy NaN conversion. """ from pineforge_codegen import transpile @@ -314,6 +316,48 @@ def test_untyped_var_drawing_array_constructor_emits_typed_member(): assert "std::vector boxes;" not in cpp +def test_comma_separated_statements_and_array_fill_emit_all_side_effects(): + cpp = _cpp( + "var float a = na\n" + "var float b = na\n" + "var float[] xs = array.new_float(3, na)\n" + "var int[] ys = array.new_int(2, na)\n" + "var label[] lbs = array.new_label(2, na)\n" + "if true\n" + " a := 1, b := 2\n" + " array.fill(xs, na), array.set(xs, 1, 7)\n" + " array.fill(ys, na), ys.set(1, na)\n" + " array.fill(lbs, na)\n" + "plot(a + b + array.get(xs, 1))" + ) + assert "a = 1;" in cpp + assert "b = 2;" in cpp + assert "xs = std::vector((size_t)(3), na());" in cpp + assert "ys = std::vector((size_t)(2), na());" in cpp + assert "lbs = std::vector