fireform-core · Dotify71 · Mar 31, 2026 · Mar 31, 2026 · May 15, 2026 · May 15, 2026
diff --git a/src/filler.py b/src/filler.py
@@ -39,8 +39,35 @@ def fill_form(self, pdf_form: str, llm: LLM):
                 for annot in sorted_annots:
                     if annot.Subtype == "/Widget" and annot.T:
                         if i < len(answers_list):
-                            annot.V = f"{answers_list[i]}"
-                            annot.AP = None
+                            answer = answers_list[i]
+
+                            # Check if the field type is a Button (Checkbox/Radio)
+                            field_type = annot.FT if annot.FT else (annot.Parent.FT if annot.Parent else None)
+                            if str(field_type) == "/Btn":
+                                # The LLM pipeline guarantees Python bool for boolean fields.
+                                # We check isinstance(answer, bool) so only an explicit True
+                                # activates the button — no fuzzy string matching needed.
+                                is_truthy = isinstance(answer, bool) and answer
+
+                                # Find the 'ON' state from the appearance dictionary
+                                on_state = "/Yes"  # Default assumption
+                                if annot.AP and annot.AP.N:
+                                    keys = [k for k in annot.AP.N.keys() if k != "/Off"]
+                                    if keys:
+                                        on_state = keys[0]
+
+                                if is_truthy:
+                                    from pdfrw import PdfName
+                                    annot.V = PdfName(on_state.strip("/"))
+                                    annot.AS = PdfName(on_state.strip("/"))
+                                else:
+                                    from pdfrw import PdfName
+                                    annot.V = PdfName("Off")
+                                    annot.AS = PdfName("Off")
+                            else:
+                                annot.V = f"{answer}"
+                                annot.AP = None
+
                             i += 1
                         else:
                             # Stop if we run out of answers

diff --git a/src/llm.py b/src/llm.py
@@ -10,15 +10,31 @@ def __init__(self, transcript_text: str=None, target_fields: list=None, json_dic
         self._target_fields = target_fields
         self._json = json_dict if json_dict is not None else {}
 
-    def build_prompt(self, current_field: str, current_type: str = "string"):
+    def build_prompt(self, current_field: str, field_type: type = str):
         """
-        This method is in charge of the prompt engineering. It creates a specific prompt for each target field.
-        @params: current_field -> represents the current element of the json that is being prompted.
-        @params: current_type  -> hint to the LLM about the expected value shape (date, number, etc.).
+        This method is in charge of the prompt engineering. It creates a specific prompt
+        for each target field, taking into account the expected field type.
+
+        If the field type is `bool`, the LLM is explicitly instructed to return only
+        the literal string `True` or `False` — no fuzzy values like 'yes' or '1'.
+
+        @params:
+            current_field -> the name of the JSON field to extract.
+            field_type    -> the expected Python type (e.g. str, bool).
         """
         prompt_path = os.path.join(os.path.dirname(__file__), "prompt.txt")
         with open(prompt_path, "r") as f:
             template = f.read()
+
+        current_type = "boolean" if field_type is bool else "string"
+
+        if field_type is bool:
+            bool_instruction = (
+                "\nIMPORTANT: This field is a boolean. "
+                "You MUST respond with ONLY the literal word True or False. "
+                "Do not use 'yes', 'no', '1', '0', or any other value."
+            )
+            return template.format(field=current_field, type=current_type, text=self._transcript_text) + bool_instruction
 
         return template.format(field=current_field, type=current_type, text=self._transcript_text)
 
@@ -27,8 +43,9 @@ def main_loop(self):
         max_retries = 3
 
         total_fields = len(self._target_fields)
-        for i, (field, field_type) in enumerate(self._target_fields.items(), 1):
-            prompt = self.build_prompt(field, field_type if isinstance(field_type, str) else "string")
+        for i, (field, field_val) in enumerate(self._target_fields.items(), 1):
+            field_type = field_val if isinstance(field_val, type) else str
+            prompt = self.build_prompt(field, field_type=field_type)
             ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
             ollama_url = f"{ollama_host}/api/generate"
 
@@ -74,15 +91,35 @@ def main_loop(self):
 
     def add_response_to_json(self, field: str, value: str):
         """
-        this method adds the following value under the specified field,
-        or under a new field if the field doesn't exist, to the json dict
+        Adds the LLM response under the specified field in the JSON dict.
+
+        If the field type in _target_fields is `bool`, the response is strictly
+        coerced: only the literal strings 'True' and 'False' (case-insensitive)
+        are accepted. Any other value is treated as None (unanswered).
         """
         value = value.strip().replace('"', "")
         parsed_value = None
 
-        if value != "-1":
-            parsed_value = value
+        # Determine expected type for this field
+        field_type = self._target_fields.get(field) if isinstance(self._target_fields, dict) else str
+        if not isinstance(field_type, type):
+            field_type = str
+
+        if field_type is bool:
+            # Strictly enforce True/False — no fuzzy matching
+            if value.lower() == "true":
+                parsed_value = True
+            elif value.lower() == "false":
+                parsed_value = False
+            else:
+                print(f"[WARN]: Boolean field '{field}' received unexpected value '{value}'. Defaulting to None.")
+                parsed_value = None
+        else:
+            if value != "-1":
+                parsed_value = value
 
+            if ";" in value:
+                parsed_value = self.handle_plural_values(value)
         if field in self._json.keys():
             self._json[field].append(parsed_value)
         else:

diff --git a/src/main.py b/src/main.py
@@ -1,4 +1,6 @@
+from typing import Union
 import os
+
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
 # Monkey patch rfdetr to force CPU usage on Mac Silicon / Docker
@@ -12,22 +14,68 @@ def patched_ensure(model_ctx):
 except ImportError:
     pass
 
-from commonforms import prepare_form
+from commonforms import prepare_form 
 from pypdf import PdfReader
 from controller import Controller
 
+def input_fields(num_fields: int):
+    fields = []
+    for i in range(num_fields):
+        field = input(f"Enter description for field {i + 1}: ")
+        fields.append(field)
+    return fields
+
+def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Union[str, os.PathLike]):
+    """
+    This function is called by the frontend server.
+    It receives the raw data, runs the PDF filling logic,
+    and returns the path to the newly created file.
+    """
+
+    print("[1] Received request from frontend.")
+    print(f"[2] PDF template path: {pdf_form_path}")
+
+    # Normalize Path/PathLike to a plain string for downstream code
+    pdf_form_path = os.fspath(pdf_form_path)
+
+    if not os.path.exists(pdf_form_path):
+        print(f"Error: PDF template not found at {pdf_form_path}")
+        return None # Or raise an exception
+
+    print("[3] Starting extraction and PDF filling process...")
+    try:
+        controller = Controller()
+        output_name = controller.fill_form(
+            user_input=user_input,
+            fields=definitions,
+            pdf_form_path=pdf_form_path
+        )
+
+        print("\n----------------------------------")
+        print(f"✅ Process Complete.")
+        print(f"Output saved to: {output_name}")
+
+        return output_name
+
+    except Exception as e:
+        print(f"An error occurred during PDF generation: {e}")
+        # Re-raise the exception so the frontend can handle it
+        raise e
 if __name__ == "__main__":
     file = "./src/inputs/file.pdf"
     user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
-    fields = [
-        "Employee's name",
-        "Employee's job title",
-        "Employee's department supervisor",
-        "Employee's phone number",
-        "Employee's email",
-        "Signature",
-        "Date",
-    ]
+    # Fields dict maps each field name to its expected Python type.
+    # Use `bool` for checkbox/radio fields so the LLM is instructed to
+    # return exactly True or False instead of fuzzy strings like "yes".
+    fields = {
+        "Employee's name": str,
+        "Employee's job title": str,
+        "Employee's department supervisor": str,
+        "Employee's phone number": str,
+        "Employee's email": str,
+        "Signature": str,
+        "Date": str,
+    }
     prepared_pdf = "temp_outfile.pdf"
     prepare_form(file, prepared_pdf)