diff --git a/src/filler.py b/src/filler.py index 7f738c2..cb374e7 100644 --- a/src/filler.py +++ b/src/filler.py @@ -39,8 +39,35 @@ def fill_form(self, pdf_form: str, llm: LLM): for annot in sorted_annots: if annot.Subtype == "/Widget" and annot.T: if i < len(answers_list): - annot.V = f"{answers_list[i]}" - annot.AP = None + answer = answers_list[i] + + # Check if the field type is a Button (Checkbox/Radio) + field_type = annot.FT if annot.FT else (annot.Parent.FT if annot.Parent else None) + if str(field_type) == "/Btn": + # The LLM pipeline guarantees Python bool for boolean fields. + # We check isinstance(answer, bool) so only an explicit True + # activates the button — no fuzzy string matching needed. + is_truthy = isinstance(answer, bool) and answer + + # Find the 'ON' state from the appearance dictionary + on_state = "/Yes" # Default assumption + if annot.AP and annot.AP.N: + keys = [k for k in annot.AP.N.keys() if k != "/Off"] + if keys: + on_state = keys[0] + + if is_truthy: + from pdfrw import PdfName + annot.V = PdfName(on_state.strip("/")) + annot.AS = PdfName(on_state.strip("/")) + else: + from pdfrw import PdfName + annot.V = PdfName("Off") + annot.AS = PdfName("Off") + else: + annot.V = f"{answer}" + annot.AP = None + i += 1 else: # Stop if we run out of answers diff --git a/src/llm.py b/src/llm.py index 6af7f05..2efbd7d 100644 --- a/src/llm.py +++ b/src/llm.py @@ -10,15 +10,31 @@ def __init__(self, transcript_text: str=None, target_fields: list=None, json_dic self._target_fields = target_fields self._json = json_dict if json_dict is not None else {} - def build_prompt(self, current_field: str, current_type: str = "string"): + def build_prompt(self, current_field: str, field_type: type = str): """ - This method is in charge of the prompt engineering. It creates a specific prompt for each target field. - @params: current_field -> represents the current element of the json that is being prompted. - @params: current_type -> hint to the LLM about the expected value shape (date, number, etc.). + This method is in charge of the prompt engineering. It creates a specific prompt + for each target field, taking into account the expected field type. + + If the field type is `bool`, the LLM is explicitly instructed to return only + the literal string `True` or `False` — no fuzzy values like 'yes' or '1'. + + @params: + current_field -> the name of the JSON field to extract. + field_type -> the expected Python type (e.g. str, bool). """ prompt_path = os.path.join(os.path.dirname(__file__), "prompt.txt") with open(prompt_path, "r") as f: template = f.read() + + current_type = "boolean" if field_type is bool else "string" + + if field_type is bool: + bool_instruction = ( + "\nIMPORTANT: This field is a boolean. " + "You MUST respond with ONLY the literal word True or False. " + "Do not use 'yes', 'no', '1', '0', or any other value." + ) + return template.format(field=current_field, type=current_type, text=self._transcript_text) + bool_instruction return template.format(field=current_field, type=current_type, text=self._transcript_text) @@ -27,8 +43,9 @@ def main_loop(self): max_retries = 3 total_fields = len(self._target_fields) - for i, (field, field_type) in enumerate(self._target_fields.items(), 1): - prompt = self.build_prompt(field, field_type if isinstance(field_type, str) else "string") + for i, (field, field_val) in enumerate(self._target_fields.items(), 1): + field_type = field_val if isinstance(field_val, type) else str + prompt = self.build_prompt(field, field_type=field_type) ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") ollama_url = f"{ollama_host}/api/generate" @@ -74,15 +91,35 @@ def main_loop(self): def add_response_to_json(self, field: str, value: str): """ - this method adds the following value under the specified field, - or under a new field if the field doesn't exist, to the json dict + Adds the LLM response under the specified field in the JSON dict. + + If the field type in _target_fields is `bool`, the response is strictly + coerced: only the literal strings 'True' and 'False' (case-insensitive) + are accepted. Any other value is treated as None (unanswered). """ value = value.strip().replace('"', "") parsed_value = None - if value != "-1": - parsed_value = value + # Determine expected type for this field + field_type = self._target_fields.get(field) if isinstance(self._target_fields, dict) else str + if not isinstance(field_type, type): + field_type = str + + if field_type is bool: + # Strictly enforce True/False — no fuzzy matching + if value.lower() == "true": + parsed_value = True + elif value.lower() == "false": + parsed_value = False + else: + print(f"[WARN]: Boolean field '{field}' received unexpected value '{value}'. Defaulting to None.") + parsed_value = None + else: + if value != "-1": + parsed_value = value + if ";" in value: + parsed_value = self.handle_plural_values(value) if field in self._json.keys(): self._json[field].append(parsed_value) else: diff --git a/src/main.py b/src/main.py index 630d262..8cfdd22 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,6 @@ +from typing import Union import os + os.environ["CUDA_VISIBLE_DEVICES"] = "" # Monkey patch rfdetr to force CPU usage on Mac Silicon / Docker @@ -12,22 +14,68 @@ def patched_ensure(model_ctx): except ImportError: pass -from commonforms import prepare_form +from commonforms import prepare_form from pypdf import PdfReader from controller import Controller +def input_fields(num_fields: int): + fields = [] + for i in range(num_fields): + field = input(f"Enter description for field {i + 1}: ") + fields.append(field) + return fields + +def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Union[str, os.PathLike]): + """ + This function is called by the frontend server. + It receives the raw data, runs the PDF filling logic, + and returns the path to the newly created file. + """ + + print("[1] Received request from frontend.") + print(f"[2] PDF template path: {pdf_form_path}") + + # Normalize Path/PathLike to a plain string for downstream code + pdf_form_path = os.fspath(pdf_form_path) + + if not os.path.exists(pdf_form_path): + print(f"Error: PDF template not found at {pdf_form_path}") + return None # Or raise an exception + + print("[3] Starting extraction and PDF filling process...") + try: + controller = Controller() + output_name = controller.fill_form( + user_input=user_input, + fields=definitions, + pdf_form_path=pdf_form_path + ) + + print("\n----------------------------------") + print(f"✅ Process Complete.") + print(f"Output saved to: {output_name}") + + return output_name + + except Exception as e: + print(f"An error occurred during PDF generation: {e}") + # Re-raise the exception so the frontend can handle it + raise e if __name__ == "__main__": file = "./src/inputs/file.pdf" user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005" - fields = [ - "Employee's name", - "Employee's job title", - "Employee's department supervisor", - "Employee's phone number", - "Employee's email", - "Signature", - "Date", - ] + # Fields dict maps each field name to its expected Python type. + # Use `bool` for checkbox/radio fields so the LLM is instructed to + # return exactly True or False instead of fuzzy strings like "yes". + fields = { + "Employee's name": str, + "Employee's job title": str, + "Employee's department supervisor": str, + "Employee's phone number": str, + "Employee's email": str, + "Signature": str, + "Date": str, + } prepared_pdf = "temp_outfile.pdf" prepare_form(file, prepared_pdf)