Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,35 @@ def fill_form(self, pdf_form: str, llm: LLM):
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
annot.V = f"{answers_list[i]}"
annot.AP = None
answer = answers_list[i]

# Check if the field type is a Button (Checkbox/Radio)
field_type = annot.FT if annot.FT else (annot.Parent.FT if annot.Parent else None)
if str(field_type) == "/Btn":
# The LLM pipeline guarantees Python bool for boolean fields.
# We check isinstance(answer, bool) so only an explicit True
# activates the button — no fuzzy string matching needed.
is_truthy = isinstance(answer, bool) and answer

# Find the 'ON' state from the appearance dictionary
on_state = "/Yes" # Default assumption
if annot.AP and annot.AP.N:
keys = [k for k in annot.AP.N.keys() if k != "/Off"]
if keys:
on_state = keys[0]

if is_truthy:
from pdfrw import PdfName
annot.V = PdfName(on_state.strip("/"))
annot.AS = PdfName(on_state.strip("/"))
else:
from pdfrw import PdfName
annot.V = PdfName("Off")
annot.AS = PdfName("Off")
else:
annot.V = f"{answer}"
annot.AP = None

i += 1
else:
# Stop if we run out of answers
Expand Down
57 changes: 47 additions & 10 deletions src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,31 @@ def __init__(self, transcript_text: str=None, target_fields: list=None, json_dic
self._target_fields = target_fields
self._json = json_dict if json_dict is not None else {}

def build_prompt(self, current_field: str, current_type: str = "string"):
def build_prompt(self, current_field: str, field_type: type = str):
"""
This method is in charge of the prompt engineering. It creates a specific prompt for each target field.
@params: current_field -> represents the current element of the json that is being prompted.
@params: current_type -> hint to the LLM about the expected value shape (date, number, etc.).
This method is in charge of the prompt engineering. It creates a specific prompt
for each target field, taking into account the expected field type.

If the field type is `bool`, the LLM is explicitly instructed to return only
the literal string `True` or `False` — no fuzzy values like 'yes' or '1'.

@params:
current_field -> the name of the JSON field to extract.
field_type -> the expected Python type (e.g. str, bool).
"""
prompt_path = os.path.join(os.path.dirname(__file__), "prompt.txt")
with open(prompt_path, "r") as f:
template = f.read()

current_type = "boolean" if field_type is bool else "string"

if field_type is bool:
bool_instruction = (
"\nIMPORTANT: This field is a boolean. "
"You MUST respond with ONLY the literal word True or False. "
"Do not use 'yes', 'no', '1', '0', or any other value."
)
return template.format(field=current_field, type=current_type, text=self._transcript_text) + bool_instruction

return template.format(field=current_field, type=current_type, text=self._transcript_text)

Expand All @@ -27,8 +43,9 @@ def main_loop(self):
max_retries = 3

total_fields = len(self._target_fields)
for i, (field, field_type) in enumerate(self._target_fields.items(), 1):
prompt = self.build_prompt(field, field_type if isinstance(field_type, str) else "string")
for i, (field, field_val) in enumerate(self._target_fields.items(), 1):
field_type = field_val if isinstance(field_val, type) else str
prompt = self.build_prompt(field, field_type=field_type)
ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
ollama_url = f"{ollama_host}/api/generate"

Expand Down Expand Up @@ -74,15 +91,35 @@ def main_loop(self):

def add_response_to_json(self, field: str, value: str):
"""
this method adds the following value under the specified field,
or under a new field if the field doesn't exist, to the json dict
Adds the LLM response under the specified field in the JSON dict.

If the field type in _target_fields is `bool`, the response is strictly
coerced: only the literal strings 'True' and 'False' (case-insensitive)
are accepted. Any other value is treated as None (unanswered).
"""
value = value.strip().replace('"', "")
parsed_value = None

if value != "-1":
parsed_value = value
# Determine expected type for this field
field_type = self._target_fields.get(field) if isinstance(self._target_fields, dict) else str
if not isinstance(field_type, type):
field_type = str

if field_type is bool:
# Strictly enforce True/False — no fuzzy matching
if value.lower() == "true":
parsed_value = True
elif value.lower() == "false":
parsed_value = False
else:
print(f"[WARN]: Boolean field '{field}' received unexpected value '{value}'. Defaulting to None.")
parsed_value = None
else:
if value != "-1":
parsed_value = value

if ";" in value:
parsed_value = self.handle_plural_values(value)
if field in self._json.keys():
self._json[field].append(parsed_value)
else:
Expand Down
68 changes: 58 additions & 10 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Union
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Monkey patch rfdetr to force CPU usage on Mac Silicon / Docker
Expand All @@ -12,22 +14,68 @@ def patched_ensure(model_ctx):
except ImportError:
pass

from commonforms import prepare_form
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller

def input_fields(num_fields: int):
fields = []
for i in range(num_fields):
field = input(f"Enter description for field {i + 1}: ")
fields.append(field)
return fields

def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Union[str, os.PathLike]):
"""
This function is called by the frontend server.
It receives the raw data, runs the PDF filling logic,
and returns the path to the newly created file.
"""

print("[1] Received request from frontend.")
print(f"[2] PDF template path: {pdf_form_path}")

# Normalize Path/PathLike to a plain string for downstream code
pdf_form_path = os.fspath(pdf_form_path)

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception

print("[3] Starting extraction and PDF filling process...")
try:
controller = Controller()
output_name = controller.fill_form(
user_input=user_input,
fields=definitions,
pdf_form_path=pdf_form_path
)

print("\n----------------------------------")
print(f"✅ Process Complete.")
print(f"Output saved to: {output_name}")

return output_name

except Exception as e:
print(f"An error occurred during PDF generation: {e}")
# Re-raise the exception so the frontend can handle it
raise e
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = [
"Employee's name",
"Employee's job title",
"Employee's department supervisor",
"Employee's phone number",
"Employee's email",
"Signature",
"Date",
]
# Fields dict maps each field name to its expected Python type.
# Use `bool` for checkbox/radio fields so the LLM is instructed to
# return exactly True or False instead of fuzzy strings like "yes".
fields = {
"Employee's name": str,
"Employee's job title": str,
"Employee's department supervisor": str,
"Employee's phone number": str,
"Employee's email": str,
"Signature": str,
"Date": str,
}
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand Down