#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ mojofmt: Formatter for Mojolicious Embedded Perl templates (.ep, .htm.ep, .html.ep) Features: - Indent HTML structure and Mojolicious line directives consistently - Preserve chomp markers (<%- ... -%>) and do not alter newline semantics - Handle helper begin/end blocks and Perl brace-based indentation for directives - Treat pre/script/style/textarea content as opaque (unchanged) - Optionally normalize spacing inside <% %> delimiters and after % directives - Integrate with perltidy for Perl code formatting (if available on PATH) - Reformat extended multi-line Perl blocks between lines with only <% and %> - CLI with --write/--check/--diff, --out, --stdin/--stdout modes - --self-test for sanity checks (includes perltidy probe) - Logging: --log-level error|info|debug (and --verbose as shorthand for info) - Optional --perl-keyword-spacing to aggressively insert spaces after Perl keywords """ from __future__ import annotations import argparse import difflib import logging import os import re import shutil import subprocess import sys from dataclasses import dataclass, replace as dc_replace from pathlib import Path from typing import Iterable, List, Optional, Tuple VERSION = "0.1.9" DEFAULT_EXTENSIONS = (".ep", ".htm.ep", ".html.ep") VOID_ELEMENTS = { "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr", } RAW_ELEMENTS = {"pre", "script", "style", "textarea"} logger = logging.getLogger("mojofmt") TAG_RE = re.compile( r""" < (?P/)? (?P[A-Za-z][\w:-]*) (?P(?:\s+[^<>]*?)?) (?P/)? > """, re.VERBOSE, ) # Mojolicious inline tags on a single line: <%...%> TPL_TAG_RE = re.compile( r""" <% (?P-)? # optional left chomp (?P==|=|\#)? # kind: ==, =, or # (?P.*?) # inner code/comment (non-greedy, no newlines) (?P-)? # optional right chomp %> """, re.VERBOSE, ) # Line directives: starts with % (possibly %= %== %#) after indentation LINE_DIR_RE = re.compile(r"^(?P\s*)%(?P==|=|\#)?(?P.*)$") # Whitespace condensing for single-line normalization WS_RE = re.compile(r"[ \t]+") # begin/end detection (heuristic) BEGIN_RE = re.compile(r"\bbegin\b") END_LINE_RE = re.compile(r"^\s*%\s*end\b") END_TAG_ONLY_RE = re.compile(r"^\s*<%-?\s*end\s*-?%>\s*$") # leading } in a directive (e.g., % } or % }} ) LEADING_RBRACE_COUNT_RE = re.compile(r"^\s*%\s*(?P\}+)") # <% } %> alone TAG_CLOSING_BRACE_ONLY_RE = re.compile(r"^\s*<%-?\s*\}+\s*-?%>\s*$") # Detect raw element opening/closing (as standalone lines) RAW_OPEN_RE = re.compile(r"^\s*<(?Ppre|script|style|textarea)\b[^>]*>\s*$", re.I) RAW_CLOSE_RE = re.compile(r"^\s*pre|script|style|textarea)\s*>\s*$", re.I) # Extended EP block delimiters (opening/closing on their own lines) OPEN_BLOCK_RE = re.compile(r'^(?P[ \t]*)<%(?P-?)(?![=#])\s*$') CLOSE_BLOCK_RE = re.compile(r'^(?P[ \t]*)(?P-?)%>\s*$') @dataclass class Config: indent_width: int = 2 eol: str = "lf" # lf|crlf|preserve normalize_delimiter_spacing: bool = True perltidy_path: Optional[str] = None # if None, use PATH perltidy_options: Optional[List[str]] = None extensions: Tuple[str, ...] = DEFAULT_EXTENSIONS respect_gitignore: bool = True verbose: bool = False # kept for shorthand with --verbose perl_keyword_spacing: bool = False # optional post-pass def load_config(cli_args: argparse.Namespace) -> Config: cfg = Config() if cli_args.indent is not None: cfg.indent_width = cli_args.indent if cli_args.eol is not None: cfg.eol = cli_args.eol if cli_args.no_space_in_delims: cfg.normalize_delimiter_spacing = False if cli_args.perltidy: cfg.perltidy_path = cli_args.perltidy cfg.verbose = cli_args.verbose cfg.perl_keyword_spacing = getattr(cli_args, "perl_keyword_spacing", False) return cfg def setup_logging(level_name: Optional[str], verbose_flag: bool) -> None: if level_name: name = level_name.lower() elif verbose_flag: name = "info" else: name = "error" level = { "error": logging.ERROR, "warning": logging.WARNING, "info": logging.INFO, "debug": logging.DEBUG, "critical": logging.CRITICAL, }.get(name, logging.ERROR) fmt = "mojofmt: %(levelname)s: %(message)s" logging.basicConfig(level=level, format=fmt) def detect_eol(text: str) -> str: if "\r\n" in text: return "crlf" return "lf" def normalize_eol(text: str, eol: str) -> str: if eol == "preserve": return text s = text.replace("\r\n", "\n").replace("\r", "\n") if eol == "lf": return s elif eol == "crlf": return s.replace("\n", "\r\n") else: return s _PERLTIDY_WARNED = False # avoid spamming logs if perltidy missing repeatedly def run_perltidy(code: str, cfg: Config) -> Tuple[int, str, str]: global _PERLTIDY_WARNED exe = cfg.perltidy_path or shutil.which("perltidy") if not exe: if not _PERLTIDY_WARNED: logger.error("perltidy not found; Perl inside template will not be reformatted") _PERLTIDY_WARNED = True return (127, code, "perltidy not found") args: List[str] = [exe] if cfg.perltidy_options: args += cfg.perltidy_options if not any(opt.startswith("-st") for opt in cfg.perltidy_options): args.append("-st") else: args += [ f"-i={cfg.indent_width}", f"-ci={cfg.indent_width}", "-l=100", "-q", "-se", "-st", "-nbbc", "-noll", ] logger.debug("Running perltidy: %s", " ".join(args)) try: proc = subprocess.run( args, input=code, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) if proc.returncode != 0: logger.debug("perltidy non-zero exit %s: %s", proc.returncode, (proc.stderr or "").strip()) return (proc.returncode, proc.stdout, proc.stderr) except FileNotFoundError: if not _PERLTIDY_WARNED: logger.error("perltidy not found while executing") _PERLTIDY_WARNED = True return (127, code, "perltidy not found") def perltidy_probe(cfg: Config) -> Tuple[bool, str]: exe = cfg.perltidy_path or shutil.which("perltidy") if not exe: return (False, "perltidy not found on PATH (install Perl::Tidy or pass --perltidy)") snippet = "my $x= {a=>1,b =>2 };" rc, out, err = run_perltidy(snippet, cfg) if rc != 0: return (False, f"perltidy exit {rc}: {(err or '').strip()}") want = ["my $x = {", "a => 1", "b => 2"] if all(w in out for w in want): return (True, f"perltidy OK: {exe}") if out and out.strip() and out.strip() != snippet: return (True, f"perltidy OK (non-default style): {exe}") return (False, "perltidy produced unexpected output") def tidy_perl_statement_oneline(code: str, cfg: Config) -> str: rc, out, _ = run_perltidy(code, cfg) if rc != 0: out = code out = out.strip() out = " ".join(out.splitlines()) out = WS_RE.sub(" ", out).strip() out = enforce_perl_keyword_spacing(out, cfg.perl_keyword_spacing) return out def tidy_perl_expression(code: str, cfg: Config) -> str: wrapped = f"do {{ {code} }}" rc, out, _ = run_perltidy(wrapped, cfg) if rc != 0: inner = code.strip() return enforce_perl_keyword_spacing(inner, cfg.perl_keyword_spacing) text = out try: start = text.index("{") depth = 0 end_idx = None for i in range(start, len(text)): ch = text[i] if ch == "{": depth += 1 elif ch == "}": depth -= 1 if depth == 0: end_idx = i break if end_idx is None: inner = code.strip() else: inner = text[start + 1 : end_idx] except ValueError: inner = code.strip() inner = " ".join(line.strip() for line in inner.splitlines()) inner = WS_RE.sub(" ", inner).strip() inner = enforce_perl_keyword_spacing(inner, cfg.perl_keyword_spacing) return inner def tidy_perl_block_multiline(code: str, cfg: Config) -> Optional[str]: """ Format a multi-line chunk of Perl by wrapping it in a do { ... } block for perltidy. Returns the formatted inner text (without the wrapper) or None on failure. """ wrapped = "do {\n" + code + "\n}" rc, out, _ = run_perltidy(wrapped, cfg) if rc != 0 or not out: return None try: start = out.index("{") except ValueError: return None depth = 0 end_idx = None for i in range(start, len(out)): ch = out[i] if ch == "{": depth += 1 elif ch == "}": depth -= 1 if depth == 0: end_idx = i break if end_idx is None: return None inner = out[start + 1 : end_idx] if inner.startswith("\n"): inner = inner[1:] if inner.endswith("\n"): inner = inner[:-1] return inner def _split_code_and_strings(s: str): chunks = [] buf: List[str] = [] in_single = in_double = False i = 0 while i < len(s): ch = s[i] if not in_single and not in_double: if ch == "'": if buf: chunks.append(("code", "".join(buf))) buf = [] in_single = True buf.append(ch) elif ch == '"': if buf: chunks.append(("code", "".join(buf))) buf = [] in_double = True buf.append(ch) else: buf.append(ch) elif in_single: buf.append(ch) if ch == "\\": if i + 1 < len(s): buf.append(s[i + 1]); i += 1 elif ch == "'": chunks.append(("str", "".join(buf))); buf = []; in_single = False elif in_double: buf.append(ch) if ch == "\\": if i + 1 < len(s): buf.append(s[i + 1]); i += 1 elif ch == '"': chunks.append(("str", "".join(buf))); buf = []; in_double = False i += 1 if buf: chunks.append(("code" if not (in_single or in_double) else "str", "".join(buf))) return chunks def _split_unquoted_comment(code_chunk: str): idx = code_chunk.find("#") if idx == -1: return code_chunk, None return code_chunk[:idx], code_chunk[idx:] def enforce_perl_keyword_spacing(s: str, enable: bool) -> str: if not enable or not s: return s # Add space after control keywords before '(' ctrl_paren = re.compile(r"\b(?Pif|elsif|unless|while|until|for|foreach|given|when)\s*\(") # Add space after declarators before sigils/paren decl = re.compile(r"\b(?Pmy|our|state|local)\s*(?=[\$\@\%\*\&\\\(])") # sub name spacing and brace spacing sub_named = re.compile(r"\bsub\s*([A-Za-z_]\w*)") sub_named_brace = re.compile(r"\bsub\s+([A-Za-z_]\w*)\s*\{") sub_anon = re.compile(r"\bsub\s*\{") # Calls which often appear without space call_paren = re.compile(r"\b(?Preturn|print|say|die|warn|exit)\s*\(") call_space = re.compile(r"\b(?Preturn|print|say|die|warn|exit)\s*(?=\S)") # else/continue/do/eval blocks else_brace = re.compile(r"\b(?Pelse|continue|do|eval)\s*\{") # Ensure space before a brace after a closing paren: "){" -> ") {" brace_after_paren = re.compile(r"\)\s*\{") # Ensure space between '}' and a following keyword: "}else" -> "} else" brace_then_kw = re.compile(r"\}\s*(?=\b(?:else|elsif|continue|when)\b)") out: List[str] = [] for kind, chunk in _split_code_and_strings(s): if kind != "code": out.append(chunk) continue code, comment = _split_unquoted_comment(chunk) code = ctrl_paren.sub(lambda m: f"{m.group('kw')} (", code) code = decl.sub(lambda m: f"{m.group('kw')} ", code) code = sub_named.sub(lambda m: f"sub {m.group(1)}", code) code = sub_named_brace.sub(lambda m: f"sub {m.group(1)} {{", code) code = sub_anon.sub("sub {", code) code = call_paren.sub(lambda m: f"{m.group('kw')} (", code) code = call_space.sub(lambda m: f"{m.group('kw')} ", code) code = brace_then_kw.sub("} ", code) code = else_brace.sub(lambda m: f"{m.group('kw')} {{", code) code = brace_after_paren.sub(") {", code) out.append(code + (comment or "")) return "".join(out) def _common_leading_ws(lines: List[str]) -> str: ws = None for ln in lines: if not ln.strip(): continue lead = len(ln) - len(ln.lstrip(' \t')) s = ln[:lead] if ws is None: ws = s else: i = 0 while i < len(ws) and i < len(s) and ws[i] == s[i]: i += 1 ws = ws[:i] return ws or "" def _dedent_block(text: str) -> str: lines = text.splitlines() # Trim leading/trailing all-whitespace lines while lines and not lines[0].strip(): lines.pop(0) while lines and not lines[-1].strip(): lines.pop() if not lines: return "" prefix = _common_leading_ws(lines) if not prefix: return "\n".join(lines) plen = len(prefix) out = [] for ln in lines: out.append(ln[plen:] if ln.startswith(prefix) else ln) return "\n".join(out) def _naive_perl_indent(code: str, width: int = 2) -> str: lines = code.splitlines() indent = 0 out = [] for raw in lines: ln = raw.rstrip() if not ln: out.append("") continue stripped = ln.lstrip() # dedent on leading closing braces leading_closes = 0 i = 0 while i < len(stripped) and stripped[i] == '}': leading_closes += 1 i += 1 indent_before = max(0, indent - leading_closes) out.append((" " * (indent_before * width)) + stripped) opens = ln.count("{") closes = ln.count("}") indent += (opens - closes) if indent < 0: indent = 0 return "\n".join(out) def normalize_tpl_tag( leftchomp: Optional[str], kind: Optional[str], body: str, rightchomp: Optional[str], cfg: Config, ) -> Tuple[str, str, str, str, str]: if not cfg.normalize_delimiter_spacing or (kind == "#"): return ("<%", leftchomp or "", kind or "", body, (rightchomp or "") + "%>") body = body.strip() left_space = " " right_space = " " if rightchomp == "" else "" open_part = "<%" + (leftchomp or "") + (kind or "") + left_space close_part = right_space + (rightchomp or "") + "%>" return (open_part, "", "", body, close_part) def substitute_tpl_tags_in_line(line: str, cfg: Config) -> str: parts: List[str] = [] last = 0 for m in TPL_TAG_RE.finditer(line): parts.append(line[last : m.start()]) leftchomp = m.group("leftchomp") or "" kind = m.group("kind") or "" body = m.group("body") rightchomp = m.group("rightchomp") or "" open_part, _, _, new_body, close_part = normalize_tpl_tag( leftchomp, kind, body, rightchomp, cfg ) if kind == "#": inner = body else: if kind in ("=", "=="): inner = tidy_perl_expression(body, cfg) else: inner = tidy_perl_statement_oneline(body, cfg) parts.append(open_part + inner + close_part) last = m.end() parts.append(line[last:]) return "".join(parts) def derive_html_tag_deltas(line_wo_tpl: str) -> Tuple[int, int, Optional[str], Optional[str]]: """ Return (pre_dedent, net_total, raw_open, raw_close): - pre_dedent: end tags at beginning of line (dedent before printing) - net_total: total start tags (+1) minus end tags (-1) across the line for non-void, non-self-closing tags - raw_open, raw_close: raw elements opened/closed on this line if they match exactly """ s = line_wo_tpl raw_open = None raw_close = None m_open = RAW_OPEN_RE.match(s) if m_open: raw_open = m_open.group("name").lower() m_close = RAW_CLOSE_RE.match(s) if m_close: raw_close = m_close.group("name").lower() pre_dedent = 0 i = 0 while i < len(s) and s[i].isspace(): i += 1 while True: m = TAG_RE.match(s, i) if not m: break if m.group("slash"): pre_dedent += 1 i = m.end() while i < len(s) and s[i].isspace(): i += 1 continue else: break net = 0 for m in TAG_RE.finditer(s): slash = m.group("slash") name = (m.group("name") or "").lower() selfclose = bool(m.group("self")) if slash: net -= 1 else: if selfclose or name in VOID_ELEMENTS: pass else: net += 1 return pre_dedent, net, raw_open, raw_close def strip_tpl_tags(line: str) -> str: return TPL_TAG_RE.sub(lambda m: " " * (m.end() - m.start()), line) def is_standalone_statement_tag(line: str) -> bool: s = line.strip() if not (s.startswith("<%") and s.endswith("%>")): return False if s.startswith("<%=") or s.startswith("<%=="): return False return True def compute_perl_deltas(line: str) -> Tuple[int, int]: """ Return (perl_dedent_before, perl_delta_after_for_next_line). Only line directives (starting with %) and standalone <% ... %> statement lines affect Perl depth. Also account for % end / <% end %> and begin blocks. """ dedent_before = 0 delta_after = 0 if END_LINE_RE.match(line) or END_TAG_ONLY_RE.match(line): dedent_before += 1 m = LEADING_RBRACE_COUNT_RE.match(line) if m: braces = m.group("braces") or "" dedent_before += len(braces) if TAG_CLOSING_BRACE_ONLY_RE.match(line): dedent_before += 1 is_dir = bool(LINE_DIR_RE.match(line)) is_stmt_tag_only = is_standalone_statement_tag(line) if is_dir: body = LINE_DIR_RE.match(line).group("body") open_count = body.count("{") close_count = body.count("}") delta_after += (open_count - close_count) if BEGIN_RE.search(line): delta_after += 1 elif is_stmt_tag_only: bodies = [m.group("body") or "" for m in TPL_TAG_RE.finditer(line)] open_count = sum(b.count("{") for b in bodies) close_count = sum(b.count("}") for b in bodies) delta_after += (open_count - close_count) if BEGIN_RE.search(line): delta_after += 1 return dedent_before, delta_after def format_line_directive(line: str, cfg: Config) -> Optional[str]: """ If the line is a Mojolicious line directive (% ...), return a formatted directive string WITHOUT leading indentation (indent applied separately). Otherwise return None. """ m = LINE_DIR_RE.match(line) if not m: return None kind = m.group("kind") or "" body = m.group("body") if kind == "#": if cfg.normalize_delimiter_spacing: trimmed = body.strip() return "%#" + ((" " + trimmed) if trimmed else "") else: return "%#" + body if kind in ("=", "=="): inner = tidy_perl_expression(body, cfg) else: inner = tidy_perl_statement_oneline(body, cfg) if cfg.normalize_delimiter_spacing: return "%" + kind + ((" " + inner) if inner else "") else: return "%" + kind + ((" " + inner) if inner else "") def rstrip_trailing_ws(line: str) -> str: return line.rstrip(" \t") def format_extended_perl_blocks(text: str, cfg: Config) -> str: """ Detect blocks where <% and %> are on their own lines (with optional chomp markers), format the inner Perl with perltidy (wrapped in do { ... }) or a naive indenter, and reinsert with the original base indentation. """ lines = text.splitlines() i = 0 out: List[str] = [] n = len(lines) while i < n: m_open = OPEN_BLOCK_RE.match(lines[i]) if not m_open: out.append(lines[i]) i += 1 continue # Find closing delimiter j = i + 1 close = None while j < n: m_close = CLOSE_BLOCK_RE.match(lines[j]) if m_close: close = m_close break j += 1 if close is None: out.append(lines[i]) i += 1 continue base = m_open.group("base") or "" left = m_open.group("left") or "" right = close.group("right") or "" body_lines = lines[i + 1 : j] inner = "\n".join(body_lines) # Dedent before formatting inner = _dedent_block(inner) # Try perltidy; fallback to naive indentation tidied = tidy_perl_block_multiline(inner, cfg) if tidied is None: logger.debug("EP block %d-%d: perltidy failed/unavailable; using naive indenter", i + 1, j + 1) tidied = _naive_perl_indent(inner, width=cfg.indent_width) else: logger.debug("EP block %d-%d: perltidy formatted (%d lines)", i + 1, j + 1, len(tidied.splitlines())) tidied = tidied.rstrip("\n") out.append(f"{base}<%{left}") if tidied: for ln in tidied.splitlines(): out.append((base + ln) if ln else base) out.append(f"{base}{right}%>") i = j + 1 # continue after closing line return "\n".join(out) + ("\n" if text.endswith("\n") else "") def format_string(src: str, cfg: Config) -> str: original_eol = detect_eol(src) text = src.replace("\r\n", "\n").replace("\r", "\n") lines = text.split("\n") html_depth = 0 perl_depth = 0 in_raw: Optional[str] = None out_lines: List[str] = [] for orig_line in lines: line = orig_line if in_raw: m_close = RAW_CLOSE_RE.match(line) if m_close and m_close.group("name").lower() == in_raw: indent_level = max(0, html_depth - 1) + perl_depth indent = " " * (cfg.indent_width * indent_level) new_line = indent + line.lstrip() out_lines.append(rstrip_trailing_ws(new_line)) html_depth = max(0, html_depth - 1) in_raw = None else: out_lines.append(line) continue perl_dedent_before, perl_delta_after = compute_perl_deltas(line) line_wo_tpl = strip_tpl_tags(line) html_pre_dedent, html_net, raw_open, raw_close = derive_html_tag_deltas(line_wo_tpl) base_html_depth = max(0, html_depth - html_pre_dedent) base_perl_depth = max(0, perl_depth - perl_dedent_before) indent_level = max(0, base_html_depth + base_perl_depth) indent = " " * (cfg.indent_width * indent_level) formatted_directive = format_line_directive(line, cfg) if formatted_directive is not None: content = formatted_directive else: content = substitute_tpl_tags_in_line(line, cfg).lstrip() new_line = indent + content.lstrip() out_lines.append(rstrip_trailing_ws(new_line)) html_depth = max(0, base_html_depth + html_net + html_pre_dedent) if raw_open and (raw_open.lower() in RAW_ELEMENTS): in_raw = raw_open.lower() perl_depth = max(0, base_perl_depth + perl_delta_after) result = "\n".join(out_lines) # Post-pass: format extended <% ... %> blocks result = format_extended_perl_blocks(result, cfg) if not result.endswith("\n"): result += "\n" eol_mode = cfg.eol if cfg.eol != "preserve" else original_eol result = normalize_eol(result, eol_mode) return result def read_text(path: Path) -> str: with path.open("rb") as f: raw = f.read() try: return raw.decode("utf-8") except UnicodeDecodeError: return raw.decode(errors="replace") def write_text(path: Path, text: str) -> None: with path.open("wb") as f: f.write(text.encode("utf-8")) def is_supported_file(path: Path, exts: Tuple[str, ...]) -> bool: name = path.name.lower() return any(name.endswith(ext) for ext in exts) def iter_files(paths: List[str], exts: Tuple[str, ...]) -> Iterable[Path]: for p in paths: pth = Path(p) if pth.is_dir(): for root, _, files in os.walk(pth): for fn in files: fp = Path(root) / fn if is_supported_file(fp, exts): logger.debug("Found file: %s", fp) yield fp else: if is_supported_file(pth, exts): logger.debug("Found file: %s", pth) yield pth def unified_diff(a: str, b: str, path: Path) -> str: a_lines = a.splitlines(keepends=True) b_lines = b.splitlines(keepends=True) return "".join( difflib.unified_diff( a_lines, b_lines, fromfile=str(path), tofile=str(path) + " (formatted)" ) ) def process_file(path: Path, cfg: Config, write: bool, show_diff: bool, backup: bool = False) -> Tuple[bool, str]: original = read_text(path) formatted = format_string(original, cfg) changed = original != formatted if changed: logger.info("Formatted: %s", path) if show_diff: sys.stdout.write(unified_diff(original, formatted, path)) if write: if backup: bak_path = path.with_name(path.name + ".bak") write_text(bak_path, original) logger.info("Backup written: %s", bak_path) write_text(path, formatted) logger.info("Overwritten: %s", path) else: logger.info("Unchanged: %s", path) return changed, formatted def process_stdin_stdout(cfg: Config) -> int: data = sys.stdin.read() formatted = format_string(data, cfg) sys.stdout.write(formatted) logger.info("Formatted stdin to stdout") return 0 def build_arg_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser(description="Format Mojolicious templates (.ep, .htm.ep, .html.ep)") p.add_argument("paths", nargs="*", help="Files or directories") p.add_argument("-w", "--write", action="store_true", help="Overwrite files in place (writes a .bak backup)") p.add_argument("-o", "--out", help="Write formatted output to this file (single input file or --stdin). Conflicts with --write/--check/--diff") p.add_argument("--check", action="store_true", help="Exit non-zero if any file would change") p.add_argument("--diff", action="store_true", help="Print unified diff for changes") p.add_argument("--stdin", action="store_true", help="Read from stdin") p.add_argument("--stdout", action="store_true", help="Write to stdout (with --stdin)") p.add_argument("--perltidy", help="Path to perltidy executable (defaults to PATH)") p.add_argument("--indent", type=int, help="Indent width (spaces, default 2)") p.add_argument("--eol", choices=["lf", "crlf", "preserve"], default="lf", help="EOL handling (default lf)") p.add_argument("--no-space-in-delims", action="store_true", help="Do not normalize spaces inside <%% %%> delimiters") p.add_argument("--perl-keyword-spacing", action="store_true", help="Aggressively insert a space after Perl keywords (if(...)->if (...), my$->my $, return(...)->return (...), etc.)") p.add_argument("--self-test", dest="self_test", action="store_true", help="Run internal sanity checks and exit 0/1") p.add_argument("--log-level", choices=["error", "info", "debug"], help="Logging level (default error)") p.add_argument("--verbose", action="store_true", help="Shorthand for --log-level info") p.add_argument("--version", action="store_true", help="Print version and exit") return p def self_test(cfg: Config) -> int: failures: List[str] = [] def check(name: str, cond: bool, detail: Optional[str] = None): if not cond: failures.append(name + (": " + detail if detail else "")) # T0: perltidy availability and behavior ok, msg = perltidy_probe(cfg) if not ok: failures.append("perltidy: " + msg) else: logger.info(msg) # T1: idempotence on a mixed template src_a = "% if (1) {\n
    \n% for my $i (1..2) {\n
  • <%= $i %>
  • \n% }\n
\n% }\n" fmt_a1 = format_string(src_a, cfg) fmt_a2 = format_string(fmt_a1, cfg) check("idempotence", fmt_a1 == fmt_a2) # T2: chomp markers preserved src_b = "
  • <%= $title -%>\n<%= $sub %>
  • \n" fmt_b = format_string(src_b, cfg) check("chomp presence", "-%>" in fmt_b) check("no-left-chomp-added", "<%-" not in fmt_b) # T3: raw element inner content unchanged src_c = "\n" fmt_c = format_string(src_c, cfg) c_lines = src_c.splitlines() f_lines = fmt_c.splitlines() if len(c_lines) >= 3 and len(f_lines) >= 3: check("raw inner unchanged", c_lines[1:-1] == f_lines[1:-1], detail=f"got {f_lines[1:-1]!r}") else: check("raw structure", False, "unexpected line count") # T4: delimiter spacing normalization for <% %> src_d = "<%my $x=1;%>\n" fmt_d = format_string(src_d, cfg) check("delimiter spacing", "<% " in fmt_d and "%>" in fmt_d) # T5: keyword spacing with flag on cfg_kw = dc_replace(cfg, perl_keyword_spacing=True) fmt_k1 = format_string("<% if($x){ %>\n", cfg_kw) check("kw if(...)", "if (" in fmt_k1 and " {" in fmt_k1) fmt_k2 = format_string("<%= return(1) %>\n", cfg_kw) check("kw return(...)", "return (" in fmt_k2) fmt_k3 = format_string('<% say"hi"; %>\n', cfg_kw) check("kw say \"...\"", 'say "' in fmt_k3) fmt_k4 = format_string("<% my($x,$y)=@_; %>\n", cfg_kw) check("kw my $", "my (" in fmt_k4 and " = @_" in fmt_k4) fmt_k5 = format_string("<% sub foo{ %>\n", cfg_kw) check("kw sub foo {", "sub foo {" in fmt_k5) # T6: extended EP block formatting src_e = "<%\nmy $x=1;\nif($x){\nsay \"hi\";\n}\n%>\n" fmt_e = format_string(src_e, cfg) check("extended block indented", ("if (" in fmt_e and "say" in fmt_e and "{\n" in fmt_e) or ("if(" not in fmt_e)) if failures: logger.error("SELF-TEST FAILURES:") for f in failures: logger.error(" - %s", f) return 1 logger.info("Self-test passed") return 0 def main(argv: Optional[List[str]] = None) -> int: parser = build_arg_parser() args = parser.parse_args(argv) setup_logging(args.log_level, args.verbose) if args.version: print(f"mojofmt {VERSION}") return 0 if args.self_test: cfg = load_config(args) return self_test(cfg) # Validate --out usage if args.out: if args.write or args.check or args.diff: parser.error("--out conflicts with --write/--check/--diff") cfg = load_config(args) out_path = Path(args.out) if args.stdin: data = sys.stdin.read() formatted = format_string(data, cfg) write_text(out_path, formatted) logger.info("Wrote %s (from stdin)", out_path) return 0 # must be exactly one input file if not args.paths or len(args.paths) != 1: parser.error("--out requires exactly one input file (or use --stdin)") in_path = Path(args.paths[0]) original = read_text(in_path) formatted = format_string(original, cfg) write_text(out_path, formatted) logger.info("Wrote %s (from %s)", out_path, in_path) return 0 cfg = load_config(args) if args.stdin: return process_stdin_stdout(cfg) if not args.paths: parser.error("No input paths provided (or use --stdin).") any_changed = False any_error = False for path in iter_files(args.paths, cfg.extensions): try: changed, _ = process_file(path, cfg, write=args.write, show_diff=args.diff, backup=args.write) any_changed = any_changed or changed except Exception as e: any_error = True logger.error("Error processing %s: %s", path, e) if args.check and any_changed: return 1 return 1 if any_error else 0 if __name__ == "__main__": sys.exit(main())