# %%
import re
import pandas as pd
from pathlib import Path
LEDGER_ROOT = Path("/home/jovyan/work/budget/fedora-budget")
def parse_budget_sections(text, fy):
"""
Parse '~ every ...' budget sections from Fedora ledgers.
Detects parent allocation, sub-allocations, and computes
the trailing 'Unallocated' reconciliation automatically.
"""
entries = []
lines = text.splitlines()
current_freq = None
in_budget = False
current_parent = None
current_block = []
def flush_block():
"""When a budget block ends, compute unallocated and save rows."""
nonlocal current_block, current_parent
if not current_block:
return
df = pd.DataFrame(current_block)
alloc_total = df["amount_num"].sum()
# only reconcile if parent exists and any spend occurred
if current_parent and alloc_total != 0:
entries.append({
"fy": fy,
"frequency": current_freq,
"parent_budget": current_parent,
"account": f"{current_parent}:Unallocated (computed)",
"amount_raw": None,
"amount_num": -alloc_total,
"is_computed_unallocated": True
})
entries.extend(current_block)
current_block = []
for line in lines:
line = line.strip()
if not line:
continue
# Start new budget block
if line.startswith("~"):
flush_block()
current_freq = line
in_budget = True
current_parent = None
continue
# End block when a new dated transaction begins
if re.match(r"^\d{4}-\d{2}-\d{2}", line):
flush_block()
in_budget = False
current_parent = None
continue
if in_budget:
if line.startswith(";"):
continue
# Strip inline comments
line = re.split(r"\s*;\s*", line)[0].strip()
# Match account + amount
m = re.match(r"^([A-Za-z0-9:\-\s&]+?)\s+(-?\$?\d[\d,\.]*)$", line)
if m:
account, amount = m.groups()
amount_str = amount.strip().replace(",", "").replace("$", "")
try:
amount_num = float(amount_str)
except ValueError:
amount_num = 0.0
# detect parent
if current_parent is None and amount_num > 0:
current_parent = account.split(":")[0]
current_block.append({
"fy": fy,
"frequency": current_freq,
"parent_budget": current_parent,
"account": account.strip(),
"amount_raw": amount.strip(),
"amount_num": amount_num,
"is_computed_unallocated": False
})
elif line:
# final unallocated marker (no $)
current_block.append({
"fy": fy,
"frequency": current_freq,
"parent_budget": current_parent,
"account": line.strip(),
"amount_raw": None,
"amount_num": 0.0,
"is_computed_unallocated": False
})
flush_block() # end of file
return entries
# --- Parse all FY ledgers ---
rows = []
for fy_dir in LEDGER_ROOT.glob("fy*/ledger"):
fy = fy_dir.parent.name.upper()
for ldg in fy_dir.glob("*.ldg"):
txt = ldg.read_text(encoding="utf-8", errors="ignore")
parsed = parse_budget_sections(txt, fy)
if parsed:
print(f"[INFO] {fy}: parsed {len(parsed)} lines from {ldg.name}")
rows.extend(parsed)
else:
print(f"[WARN] {fy}: no budget sections found in {ldg.name}")
budget_df = pd.DataFrame(rows)
if not budget_df.empty:
budget_df["category"] = budget_df["account"].apply(lambda x: x.split(":")[0])
print(f"[INFO] Parsed total {len(budget_df)} rows.")
display(budget_df.head(10))
else:
print("[WARN] No budget entries found.")