2025-08-09 15:52:03 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
mojofmt : Formatter for Mojolicious Embedded Perl templates ( . ep , . htm . ep , . html . ep )
Features :
- Indent HTML structure and Mojolicious line directives consistently
- Preserve chomp markers ( < % - . . . - % > ) and do not alter newline semantics
- Handle helper begin / end blocks and Perl brace - based indentation for directives
- Treat pre / script / style / textarea content as opaque ( unchanged )
- Optionally normalize spacing inside < % % > delimiters and after % directives
- Integrate with perltidy for Perl code formatting ( if available on PATH )
- Reformat extended multi - line Perl blocks between lines with only < % and % >
- CLI with - - write / - - check / - - diff , - - out , - - stdin / - - stdout modes
- - - self - test for sanity checks ( includes perltidy probe )
- Logging : - - log - level error | info | debug ( and - - verbose as shorthand for info )
- Optional - - perl - keyword - spacing to aggressively insert spaces after Perl keywords
"""
from __future__ import annotations
import argparse
import difflib
import logging
import os
import re
import shutil
import subprocess
import sys
from dataclasses import dataclass , replace as dc_replace
from pathlib import Path
from typing import Iterable , List , Optional , Tuple
VERSION = " 0.1.9 "
DEFAULT_EXTENSIONS = ( " .ep " , " .htm.ep " , " .html.ep " )
VOID_ELEMENTS = {
" area " , " base " , " br " , " col " , " embed " , " hr " , " img " , " input " ,
" link " , " meta " , " param " , " source " , " track " , " wbr " ,
}
RAW_ELEMENTS = { " pre " , " script " , " style " , " textarea " }
logger = logging . getLogger ( " mojofmt " )
TAG_RE = re . compile (
r """
<
( ? P < slash > / ) ?
( ? P < name > [ A - Za - z ] [ \w : - ] * )
( ? P < attrs > ( ? : \s + [ ^ < > ] * ? ) ? )
( ? P < self > / ) ?
>
""" ,
re . VERBOSE ,
)
# Mojolicious inline tags on a single line: <%...%>
TPL_TAG_RE = re . compile (
r """
< %
( ? P < leftchomp > - ) ? # optional left chomp
( ? P < kind > == | = | \#)? # kind: ==, =, or #
( ? P < body > . * ? ) # inner code/comment (non-greedy, no newlines)
( ? P < rightchomp > - ) ? # optional right chomp
% >
""" ,
re . VERBOSE ,
)
# Line directives: starts with % (possibly %= %== %#) after indentation
LINE_DIR_RE = re . compile ( r " ^(?P<indent> \ s*) % (?P<kind>==|=| \ #)?(?P<body>.*)$ " )
# Whitespace condensing for single-line normalization
WS_RE = re . compile ( r " [ \ t]+ " )
# begin/end detection (heuristic)
BEGIN_RE = re . compile ( r " \ bbegin \ b " )
END_LINE_RE = re . compile ( r " ^ \ s* % \ s*end \ b " )
END_TAG_ONLY_RE = re . compile ( r " ^ \ s*< % -? \ s*end \ s*-? % > \ s*$ " )
# leading } in a directive (e.g., % } or % }} )
LEADING_RBRACE_COUNT_RE = re . compile ( r " ^ \ s* % \ s*(?P<braces> \ }+) " )
# <% } %> alone
TAG_CLOSING_BRACE_ONLY_RE = re . compile ( r " ^ \ s*< % -? \ s* \ }+ \ s*-? % > \ s*$ " )
# Detect raw element opening/closing (as standalone lines)
RAW_OPEN_RE = re . compile ( r " ^ \ s*<(?P<name>pre|script|style|textarea) \ b[^>]*> \ s*$ " , re . I )
RAW_CLOSE_RE = re . compile ( r " ^ \ s*</(?P<name>pre|script|style|textarea) \ s*> \ s*$ " , re . I )
# Extended EP block delimiters (opening/closing on their own lines)
OPEN_BLOCK_RE = re . compile ( r ' ^(?P<base>[ \ t]*)< % (?P<left>-?)(?![=#]) \ s*$ ' )
2025-08-09 16:14:30 +01:00
CLOSE_BLOCK_RE = re . compile (
r ' ^(?P<base>[ \ t]*)(?P<right>-?) % (?:>| >) \ s*$ '
)
2025-08-09 15:52:03 +01:00
@dataclass
class Config :
indent_width : int = 2
eol : str = " lf " # lf|crlf|preserve
normalize_delimiter_spacing : bool = True
perltidy_path : Optional [ str ] = None # if None, use PATH
perltidy_options : Optional [ List [ str ] ] = None
extensions : Tuple [ str , . . . ] = DEFAULT_EXTENSIONS
respect_gitignore : bool = True
verbose : bool = False # kept for shorthand with --verbose
perl_keyword_spacing : bool = False # optional post-pass
def load_config ( cli_args : argparse . Namespace ) - > Config :
cfg = Config ( )
if cli_args . indent is not None :
cfg . indent_width = cli_args . indent
if cli_args . eol is not None :
cfg . eol = cli_args . eol
if cli_args . no_space_in_delims :
cfg . normalize_delimiter_spacing = False
if cli_args . perltidy :
cfg . perltidy_path = cli_args . perltidy
cfg . verbose = cli_args . verbose
cfg . perl_keyword_spacing = getattr ( cli_args , " perl_keyword_spacing " , False )
return cfg
def setup_logging ( level_name : Optional [ str ] , verbose_flag : bool ) - > None :
if level_name :
name = level_name . lower ( )
elif verbose_flag :
name = " info "
else :
name = " error "
level = {
" error " : logging . ERROR ,
" warning " : logging . WARNING ,
" info " : logging . INFO ,
" debug " : logging . DEBUG ,
" critical " : logging . CRITICAL ,
} . get ( name , logging . ERROR )
fmt = " mojofmt: %(levelname)s : %(message)s "
logging . basicConfig ( level = level , format = fmt )
def detect_eol ( text : str ) - > str :
if " \r \n " in text :
return " crlf "
return " lf "
def normalize_eol ( text : str , eol : str ) - > str :
if eol == " preserve " :
return text
s = text . replace ( " \r \n " , " \n " ) . replace ( " \r " , " \n " )
if eol == " lf " :
return s
elif eol == " crlf " :
return s . replace ( " \n " , " \r \n " )
else :
return s
_PERLTIDY_WARNED = False # avoid spamming logs if perltidy missing repeatedly
def run_perltidy ( code : str , cfg : Config ) - > Tuple [ int , str , str ] :
global _PERLTIDY_WARNED
exe = cfg . perltidy_path or shutil . which ( " perltidy " )
if not exe :
if not _PERLTIDY_WARNED :
logger . error ( " perltidy not found; Perl inside template will not be reformatted " )
_PERLTIDY_WARNED = True
return ( 127 , code , " perltidy not found " )
args : List [ str ] = [ exe ]
if cfg . perltidy_options :
args + = cfg . perltidy_options
if not any ( opt . startswith ( " -st " ) for opt in cfg . perltidy_options ) :
args . append ( " -st " )
else :
args + = [
f " -i= { cfg . indent_width } " ,
f " -ci= { cfg . indent_width } " ,
" -l=100 " ,
" -q " ,
" -se " ,
" -st " ,
" -nbbc " ,
" -noll " ,
]
logger . debug ( " Running perltidy: %s " , " " . join ( args ) )
try :
proc = subprocess . run (
args ,
input = code ,
stdout = subprocess . PIPE ,
stderr = subprocess . PIPE ,
text = True ,
check = False ,
)
if proc . returncode != 0 :
logger . debug ( " perltidy non-zero exit %s : %s " , proc . returncode , ( proc . stderr or " " ) . strip ( ) )
return ( proc . returncode , proc . stdout , proc . stderr )
except FileNotFoundError :
if not _PERLTIDY_WARNED :
logger . error ( " perltidy not found while executing " )
_PERLTIDY_WARNED = True
return ( 127 , code , " perltidy not found " )
def perltidy_probe ( cfg : Config ) - > Tuple [ bool , str ] :
exe = cfg . perltidy_path or shutil . which ( " perltidy " )
if not exe :
return ( False , " perltidy not found on PATH (install Perl::Tidy or pass --perltidy) " )
snippet = " my $x= { a=>1,b =>2 }; "
rc , out , err = run_perltidy ( snippet , cfg )
if rc != 0 :
return ( False , f " perltidy exit { rc } : { ( err or ' ' ) . strip ( ) } " )
want = [ " my $x = { " , " a => 1 " , " b => 2 " ]
if all ( w in out for w in want ) :
return ( True , f " perltidy OK: { exe } " )
if out and out . strip ( ) and out . strip ( ) != snippet :
return ( True , f " perltidy OK (non-default style): { exe } " )
return ( False , " perltidy produced unexpected output " )
def tidy_perl_statement_oneline ( code : str , cfg : Config ) - > str :
rc , out , _ = run_perltidy ( code , cfg )
if rc != 0 :
out = code
out = out . strip ( )
out = " " . join ( out . splitlines ( ) )
out = WS_RE . sub ( " " , out ) . strip ( )
out = enforce_perl_keyword_spacing ( out , cfg . perl_keyword_spacing )
return out
def tidy_perl_expression ( code : str , cfg : Config ) - > str :
wrapped = f " do {{ { code } }} "
rc , out , _ = run_perltidy ( wrapped , cfg )
if rc != 0 :
inner = code . strip ( )
return enforce_perl_keyword_spacing ( inner , cfg . perl_keyword_spacing )
text = out
try :
start = text . index ( " { " )
depth = 0
end_idx = None
for i in range ( start , len ( text ) ) :
ch = text [ i ]
if ch == " { " :
depth + = 1
elif ch == " } " :
depth - = 1
if depth == 0 :
end_idx = i
break
if end_idx is None :
inner = code . strip ( )
else :
inner = text [ start + 1 : end_idx ]
except ValueError :
inner = code . strip ( )
inner = " " . join ( line . strip ( ) for line in inner . splitlines ( ) )
inner = WS_RE . sub ( " " , inner ) . strip ( )
inner = enforce_perl_keyword_spacing ( inner , cfg . perl_keyword_spacing )
return inner
def tidy_perl_block_multiline ( code : str , cfg : Config ) - > Optional [ str ] :
"""
Format a multi - line chunk of Perl by wrapping it in a do { . . . } block for perltidy .
Returns the formatted inner text ( without the wrapper ) or None on failure .
"""
wrapped = " do { \n " + code + " \n } "
rc , out , _ = run_perltidy ( wrapped , cfg )
if rc != 0 or not out :
return None
try :
start = out . index ( " { " )
except ValueError :
return None
depth = 0
end_idx = None
for i in range ( start , len ( out ) ) :
ch = out [ i ]
if ch == " { " :
depth + = 1
elif ch == " } " :
depth - = 1
if depth == 0 :
end_idx = i
break
if end_idx is None :
return None
inner = out [ start + 1 : end_idx ]
if inner . startswith ( " \n " ) :
inner = inner [ 1 : ]
if inner . endswith ( " \n " ) :
inner = inner [ : - 1 ]
return inner
def _split_code_and_strings ( s : str ) :
chunks = [ ]
buf : List [ str ] = [ ]
in_single = in_double = False
i = 0
while i < len ( s ) :
ch = s [ i ]
if not in_single and not in_double :
if ch == " ' " :
if buf :
chunks . append ( ( " code " , " " . join ( buf ) ) )
buf = [ ]
in_single = True
buf . append ( ch )
elif ch == ' " ' :
if buf :
chunks . append ( ( " code " , " " . join ( buf ) ) )
buf = [ ]
in_double = True
buf . append ( ch )
else :
buf . append ( ch )
elif in_single :
buf . append ( ch )
if ch == " \\ " :
if i + 1 < len ( s ) :
buf . append ( s [ i + 1 ] ) ; i + = 1
elif ch == " ' " :
chunks . append ( ( " str " , " " . join ( buf ) ) ) ; buf = [ ] ; in_single = False
elif in_double :
buf . append ( ch )
if ch == " \\ " :
if i + 1 < len ( s ) :
buf . append ( s [ i + 1 ] ) ; i + = 1
elif ch == ' " ' :
chunks . append ( ( " str " , " " . join ( buf ) ) ) ; buf = [ ] ; in_double = False
i + = 1
if buf :
chunks . append ( ( " code " if not ( in_single or in_double ) else " str " , " " . join ( buf ) ) )
return chunks
def _split_unquoted_comment ( code_chunk : str ) :
idx = code_chunk . find ( " # " )
if idx == - 1 :
return code_chunk , None
return code_chunk [ : idx ] , code_chunk [ idx : ]
def enforce_perl_keyword_spacing ( s : str , enable : bool ) - > str :
if not enable or not s :
return s
# Add space after control keywords before '('
ctrl_paren = re . compile ( r " \ b(?P<kw>if|elsif|unless|while|until|for|foreach|given|when) \ s* \ ( " )
# Add space after declarators before sigils/paren
decl = re . compile ( r " \ b(?P<kw>my|our|state|local) \ s*(?=[ \ $ \ @ \ % \ * \ & \\ \ (]) " )
# sub name spacing and brace spacing
sub_named = re . compile ( r " \ bsub \ s*([A-Za-z_] \ w*) " )
sub_named_brace = re . compile ( r " \ bsub \ s+([A-Za-z_] \ w*) \ s* \ { " )
sub_anon = re . compile ( r " \ bsub \ s* \ { " )
# Calls which often appear without space
call_paren = re . compile ( r " \ b(?P<kw>return|print|say|die|warn|exit) \ s* \ ( " )
call_space = re . compile ( r " \ b(?P<kw>return|print|say|die|warn|exit) \ s*(?= \ S) " )
# else/continue/do/eval blocks
else_brace = re . compile ( r " \ b(?P<kw>else|continue|do|eval) \ s* \ { " )
# Ensure space before a brace after a closing paren: "){" -> ") {"
brace_after_paren = re . compile ( r " \ ) \ s* \ { " )
# Ensure space between '}' and a following keyword: "}else" -> "} else"
brace_then_kw = re . compile ( r " \ } \ s*(?= \ b(?:else|elsif|continue|when) \ b) " )
out : List [ str ] = [ ]
for kind , chunk in _split_code_and_strings ( s ) :
if kind != " code " :
out . append ( chunk )
continue
code , comment = _split_unquoted_comment ( chunk )
code = ctrl_paren . sub ( lambda m : f " { m . group ( ' kw ' ) } ( " , code )
code = decl . sub ( lambda m : f " { m . group ( ' kw ' ) } " , code )
code = sub_named . sub ( lambda m : f " sub { m . group ( 1 ) } " , code )
code = sub_named_brace . sub ( lambda m : f " sub { m . group ( 1 ) } {{ " , code )
code = sub_anon . sub ( " sub { " , code )
code = call_paren . sub ( lambda m : f " { m . group ( ' kw ' ) } ( " , code )
code = call_space . sub ( lambda m : f " { m . group ( ' kw ' ) } " , code )
code = brace_then_kw . sub ( " } " , code )
code = else_brace . sub ( lambda m : f " { m . group ( ' kw ' ) } {{ " , code )
code = brace_after_paren . sub ( " ) { " , code )
out . append ( code + ( comment or " " ) )
return " " . join ( out )
def _common_leading_ws ( lines : List [ str ] ) - > str :
ws = None
for ln in lines :
if not ln . strip ( ) :
continue
lead = len ( ln ) - len ( ln . lstrip ( ' \t ' ) )
s = ln [ : lead ]
if ws is None :
ws = s
else :
i = 0
while i < len ( ws ) and i < len ( s ) and ws [ i ] == s [ i ] :
i + = 1
ws = ws [ : i ]
return ws or " "
def _dedent_block ( text : str ) - > str :
lines = text . splitlines ( )
# Trim leading/trailing all-whitespace lines
while lines and not lines [ 0 ] . strip ( ) :
lines . pop ( 0 )
while lines and not lines [ - 1 ] . strip ( ) :
lines . pop ( )
if not lines :
return " "
prefix = _common_leading_ws ( lines )
if not prefix :
return " \n " . join ( lines )
plen = len ( prefix )
out = [ ]
for ln in lines :
out . append ( ln [ plen : ] if ln . startswith ( prefix ) else ln )
return " \n " . join ( out )
def _naive_perl_indent ( code : str , width : int = 2 ) - > str :
lines = code . splitlines ( )
indent = 0
out = [ ]
for raw in lines :
ln = raw . rstrip ( )
if not ln :
out . append ( " " )
continue
stripped = ln . lstrip ( )
# dedent on leading closing braces
leading_closes = 0
i = 0
while i < len ( stripped ) and stripped [ i ] == ' } ' :
leading_closes + = 1
i + = 1
indent_before = max ( 0 , indent - leading_closes )
out . append ( ( " " * ( indent_before * width ) ) + stripped )
opens = ln . count ( " { " )
closes = ln . count ( " } " )
indent + = ( opens - closes )
if indent < 0 :
indent = 0
return " \n " . join ( out )
def normalize_tpl_tag (
leftchomp : Optional [ str ] ,
kind : Optional [ str ] ,
body : str ,
rightchomp : Optional [ str ] ,
cfg : Config ,
) - > Tuple [ str , str , str , str , str ] :
if not cfg . normalize_delimiter_spacing or ( kind == " # " ) :
return ( " < % " , leftchomp or " " , kind or " " , body , ( rightchomp or " " ) + " % > " )
body = body . strip ( )
left_space = " "
right_space = " " if rightchomp == " " else " "
open_part = " < % " + ( leftchomp or " " ) + ( kind or " " ) + left_space
close_part = right_space + ( rightchomp or " " ) + " % > "
return ( open_part , " " , " " , body , close_part )
def substitute_tpl_tags_in_line ( line : str , cfg : Config ) - > str :
parts : List [ str ] = [ ]
last = 0
for m in TPL_TAG_RE . finditer ( line ) :
parts . append ( line [ last : m . start ( ) ] )
leftchomp = m . group ( " leftchomp " ) or " "
kind = m . group ( " kind " ) or " "
body = m . group ( " body " )
rightchomp = m . group ( " rightchomp " ) or " "
open_part , _ , _ , new_body , close_part = normalize_tpl_tag (
leftchomp , kind , body , rightchomp , cfg
)
if kind == " # " :
inner = body
else :
if kind in ( " = " , " == " ) :
inner = tidy_perl_expression ( body , cfg )
else :
inner = tidy_perl_statement_oneline ( body , cfg )
parts . append ( open_part + inner + close_part )
last = m . end ( )
parts . append ( line [ last : ] )
return " " . join ( parts )
def derive_html_tag_deltas ( line_wo_tpl : str ) - > Tuple [ int , int , Optional [ str ] , Optional [ str ] ] :
"""
Return ( pre_dedent , net_total , raw_open , raw_close ) :
- pre_dedent : end tags at beginning of line ( dedent before printing )
- net_total : total start tags ( + 1 ) minus end tags ( - 1 ) across the line for non - void , non - self - closing tags
- raw_open , raw_close : raw elements opened / closed on this line if they match exactly
"""
s = line_wo_tpl
raw_open = None
raw_close = None
m_open = RAW_OPEN_RE . match ( s )
if m_open :
raw_open = m_open . group ( " name " ) . lower ( )
m_close = RAW_CLOSE_RE . match ( s )
if m_close :
raw_close = m_close . group ( " name " ) . lower ( )
pre_dedent = 0
i = 0
while i < len ( s ) and s [ i ] . isspace ( ) :
i + = 1
while True :
m = TAG_RE . match ( s , i )
if not m :
break
if m . group ( " slash " ) :
pre_dedent + = 1
i = m . end ( )
while i < len ( s ) and s [ i ] . isspace ( ) :
i + = 1
continue
else :
break
net = 0
for m in TAG_RE . finditer ( s ) :
slash = m . group ( " slash " )
name = ( m . group ( " name " ) or " " ) . lower ( )
selfclose = bool ( m . group ( " self " ) )
if slash :
net - = 1
else :
if selfclose or name in VOID_ELEMENTS :
pass
else :
net + = 1
return pre_dedent , net , raw_open , raw_close
def strip_tpl_tags ( line : str ) - > str :
return TPL_TAG_RE . sub ( lambda m : " " * ( m . end ( ) - m . start ( ) ) , line )
def is_standalone_statement_tag ( line : str ) - > bool :
s = line . strip ( )
if not ( s . startswith ( " < % " ) and s . endswith ( " % > " ) ) :
return False
if s . startswith ( " < % = " ) or s . startswith ( " < % == " ) :
return False
return True
def compute_perl_deltas ( line : str ) - > Tuple [ int , int ] :
"""
Return ( perl_dedent_before , perl_delta_after_for_next_line ) .
Only line directives ( starting with % ) and standalone < % . . . % > statement lines
affect Perl depth . Also account for % end / < % end % > and begin blocks .
"""
dedent_before = 0
delta_after = 0
if END_LINE_RE . match ( line ) or END_TAG_ONLY_RE . match ( line ) :
dedent_before + = 1
m = LEADING_RBRACE_COUNT_RE . match ( line )
if m :
braces = m . group ( " braces " ) or " "
dedent_before + = len ( braces )
if TAG_CLOSING_BRACE_ONLY_RE . match ( line ) :
dedent_before + = 1
is_dir = bool ( LINE_DIR_RE . match ( line ) )
is_stmt_tag_only = is_standalone_statement_tag ( line )
if is_dir :
body = LINE_DIR_RE . match ( line ) . group ( " body " )
open_count = body . count ( " { " )
close_count = body . count ( " } " )
delta_after + = ( open_count - close_count )
if BEGIN_RE . search ( line ) :
delta_after + = 1
elif is_stmt_tag_only :
bodies = [ m . group ( " body " ) or " " for m in TPL_TAG_RE . finditer ( line ) ]
open_count = sum ( b . count ( " { " ) for b in bodies )
close_count = sum ( b . count ( " } " ) for b in bodies )
delta_after + = ( open_count - close_count )
if BEGIN_RE . search ( line ) :
delta_after + = 1
return dedent_before , delta_after
def format_line_directive ( line : str , cfg : Config ) - > Optional [ str ] :
"""
If the line is a Mojolicious line directive ( % . . . ) , return a formatted
directive string WITHOUT leading indentation ( indent applied separately ) .
Otherwise return None .
"""
m = LINE_DIR_RE . match ( line )
if not m :
return None
kind = m . group ( " kind " ) or " "
body = m . group ( " body " )
if kind == " # " :
if cfg . normalize_delimiter_spacing :
trimmed = body . strip ( )
return " % # " + ( ( " " + trimmed ) if trimmed else " " )
else :
return " % # " + body
if kind in ( " = " , " == " ) :
inner = tidy_perl_expression ( body , cfg )
else :
inner = tidy_perl_statement_oneline ( body , cfg )
if cfg . normalize_delimiter_spacing :
return " % " + kind + ( ( " " + inner ) if inner else " " )
else :
return " % " + kind + ( ( " " + inner ) if inner else " " )
def rstrip_trailing_ws ( line : str ) - > str :
return line . rstrip ( " \t " )
def format_extended_perl_blocks ( text : str , cfg : Config ) - > str :
"""
Detect blocks where < % and % > are on their own lines ( with optional chomp markers ) ,
format the inner Perl with perltidy ( wrapped in do { . . . } ) or a naive indenter ,
and reinsert with the original base indentation .
"""
lines = text . splitlines ( )
i = 0
out : List [ str ] = [ ]
n = len ( lines )
while i < n :
m_open = OPEN_BLOCK_RE . match ( lines [ i ] )
if not m_open :
out . append ( lines [ i ] )
i + = 1
continue
# Find closing delimiter
j = i + 1
close = None
while j < n :
m_close = CLOSE_BLOCK_RE . match ( lines [ j ] )
if m_close :
close = m_close
break
j + = 1
if close is None :
out . append ( lines [ i ] )
i + = 1
continue
base = m_open . group ( " base " ) or " "
left = m_open . group ( " left " ) or " "
right = close . group ( " right " ) or " "
body_lines = lines [ i + 1 : j ]
inner = " \n " . join ( body_lines )
# Dedent before formatting
inner = _dedent_block ( inner )
# Try perltidy; fallback to naive indentation
tidied = tidy_perl_block_multiline ( inner , cfg )
if tidied is None :
logger . debug ( " EP block %d - %d : perltidy failed/unavailable; using naive indenter " , i + 1 , j + 1 )
tidied = _naive_perl_indent ( inner , width = cfg . indent_width )
else :
logger . debug ( " EP block %d - %d : perltidy formatted ( %d lines) " , i + 1 , j + 1 , len ( tidied . splitlines ( ) ) )
tidied = tidied . rstrip ( " \n " )
out . append ( f " { base } <% { left } " )
if tidied :
for ln in tidied . splitlines ( ) :
out . append ( ( base + ln ) if ln else base )
out . append ( f " { base } { right } %> " )
i = j + 1 # continue after closing line
return " \n " . join ( out ) + ( " \n " if text . endswith ( " \n " ) else " " )
def format_string ( src : str , cfg : Config ) - > str :
original_eol = detect_eol ( src )
text = src . replace ( " \r \n " , " \n " ) . replace ( " \r " , " \n " )
lines = text . split ( " \n " )
html_depth = 0
perl_depth = 0
in_raw : Optional [ str ] = None
out_lines : List [ str ] = [ ]
for orig_line in lines :
line = orig_line
if in_raw :
m_close = RAW_CLOSE_RE . match ( line )
if m_close and m_close . group ( " name " ) . lower ( ) == in_raw :
indent_level = max ( 0 , html_depth - 1 ) + perl_depth
indent = " " * ( cfg . indent_width * indent_level )
new_line = indent + line . lstrip ( )
out_lines . append ( rstrip_trailing_ws ( new_line ) )
html_depth = max ( 0 , html_depth - 1 )
in_raw = None
else :
out_lines . append ( line )
continue
perl_dedent_before , perl_delta_after = compute_perl_deltas ( line )
line_wo_tpl = strip_tpl_tags ( line )
html_pre_dedent , html_net , raw_open , raw_close = derive_html_tag_deltas ( line_wo_tpl )
base_html_depth = max ( 0 , html_depth - html_pre_dedent )
base_perl_depth = max ( 0 , perl_depth - perl_dedent_before )
indent_level = max ( 0 , base_html_depth + base_perl_depth )
indent = " " * ( cfg . indent_width * indent_level )
formatted_directive = format_line_directive ( line , cfg )
if formatted_directive is not None :
content = formatted_directive
else :
content = substitute_tpl_tags_in_line ( line , cfg ) . lstrip ( )
new_line = indent + content . lstrip ( )
out_lines . append ( rstrip_trailing_ws ( new_line ) )
html_depth = max ( 0 , base_html_depth + html_net + html_pre_dedent )
if raw_open and ( raw_open . lower ( ) in RAW_ELEMENTS ) :
in_raw = raw_open . lower ( )
perl_depth = max ( 0 , base_perl_depth + perl_delta_after )
result = " \n " . join ( out_lines )
# Post-pass: format extended <% ... %> blocks
result = format_extended_perl_blocks ( result , cfg )
if not result . endswith ( " \n " ) :
result + = " \n "
eol_mode = cfg . eol if cfg . eol != " preserve " else original_eol
result = normalize_eol ( result , eol_mode )
return result
def read_text ( path : Path ) - > str :
with path . open ( " rb " ) as f :
raw = f . read ( )
try :
return raw . decode ( " utf-8 " )
except UnicodeDecodeError :
return raw . decode ( errors = " replace " )
def write_text ( path : Path , text : str ) - > None :
with path . open ( " wb " ) as f :
f . write ( text . encode ( " utf-8 " ) )
def is_supported_file ( path : Path , exts : Tuple [ str , . . . ] ) - > bool :
name = path . name . lower ( )
return any ( name . endswith ( ext ) for ext in exts )
def iter_files ( paths : List [ str ] , exts : Tuple [ str , . . . ] ) - > Iterable [ Path ] :
for p in paths :
pth = Path ( p )
if pth . is_dir ( ) :
for root , _ , files in os . walk ( pth ) :
for fn in files :
fp = Path ( root ) / fn
if is_supported_file ( fp , exts ) :
logger . debug ( " Found file: %s " , fp )
yield fp
else :
if is_supported_file ( pth , exts ) :
logger . debug ( " Found file: %s " , pth )
yield pth
def unified_diff ( a : str , b : str , path : Path ) - > str :
a_lines = a . splitlines ( keepends = True )
b_lines = b . splitlines ( keepends = True )
return " " . join (
difflib . unified_diff (
a_lines , b_lines , fromfile = str ( path ) , tofile = str ( path ) + " (formatted) "
)
)
def process_file ( path : Path , cfg : Config , write : bool , show_diff : bool , backup : bool = False ) - > Tuple [ bool , str ] :
original = read_text ( path )
formatted = format_string ( original , cfg )
changed = original != formatted
if changed :
logger . info ( " Formatted: %s " , path )
if show_diff :
sys . stdout . write ( unified_diff ( original , formatted , path ) )
if write :
if backup :
bak_path = path . with_name ( path . name + " .bak " )
write_text ( bak_path , original )
logger . info ( " Backup written: %s " , bak_path )
write_text ( path , formatted )
logger . info ( " Overwritten: %s " , path )
else :
logger . info ( " Unchanged: %s " , path )
return changed , formatted
def process_stdin_stdout ( cfg : Config ) - > int :
data = sys . stdin . read ( )
formatted = format_string ( data , cfg )
sys . stdout . write ( formatted )
logger . info ( " Formatted stdin to stdout " )
return 0
def build_arg_parser ( ) - > argparse . ArgumentParser :
p = argparse . ArgumentParser ( description = " Format Mojolicious templates (.ep, .htm.ep, .html.ep) " )
p . add_argument ( " paths " , nargs = " * " , help = " Files or directories " )
p . add_argument ( " -w " , " --write " , action = " store_true " , help = " Overwrite files in place (writes a .bak backup) " )
p . add_argument ( " -o " , " --out " , help = " Write formatted output to this file (single input file or --stdin). Conflicts with --write/--check/--diff " )
p . add_argument ( " --check " , action = " store_true " , help = " Exit non-zero if any file would change " )
p . add_argument ( " --diff " , action = " store_true " , help = " Print unified diff for changes " )
p . add_argument ( " --stdin " , action = " store_true " , help = " Read from stdin " )
p . add_argument ( " --stdout " , action = " store_true " , help = " Write to stdout (with --stdin) " )
p . add_argument ( " --perltidy " , help = " Path to perltidy executable (defaults to PATH) " )
p . add_argument ( " --indent " , type = int , help = " Indent width (spaces, default 2) " )
p . add_argument ( " --eol " , choices = [ " lf " , " crlf " , " preserve " ] , default = " lf " , help = " EOL handling (default lf) " )
p . add_argument ( " --no-space-in-delims " , action = " store_true " , help = " Do not normalize spaces inside < %% %% > delimiters " )
p . add_argument ( " --perl-keyword-spacing " , action = " store_true " , help = " Aggressively insert a space after Perl keywords (if(...)->if (...), my$->my $, return(...)->return (...), etc.) " )
p . add_argument ( " --self-test " , dest = " self_test " , action = " store_true " , help = " Run internal sanity checks and exit 0/1 " )
p . add_argument ( " --log-level " , choices = [ " error " , " info " , " debug " ] , help = " Logging level (default error) " )
p . add_argument ( " --verbose " , action = " store_true " , help = " Shorthand for --log-level info " )
p . add_argument ( " --version " , action = " store_true " , help = " Print version and exit " )
return p
def self_test ( cfg : Config ) - > int :
failures : List [ str ] = [ ]
def check ( name : str , cond : bool , detail : Optional [ str ] = None ) :
if not cond :
failures . append ( name + ( " : " + detail if detail else " " ) )
# T0: perltidy availability and behavior
ok , msg = perltidy_probe ( cfg )
if not ok :
failures . append ( " perltidy: " + msg )
else :
logger . info ( msg )
# T1: idempotence on a mixed template
src_a = " % i f (1) { \n <ul> \n % f or my $i (1..2) { \n <li>< % = $i % ></li> \n % } \n </ul> \n % } \n "
fmt_a1 = format_string ( src_a , cfg )
fmt_a2 = format_string ( fmt_a1 , cfg )
check ( " idempotence " , fmt_a1 == fmt_a2 )
# T2: chomp markers preserved
src_b = " <li>< % = $title - % > \n < % = $sub % ></li> \n "
fmt_b = format_string ( src_b , cfg )
check ( " chomp presence " , " - % > " in fmt_b )
check ( " no-left-chomp-added " , " < % - " not in fmt_b )
# T3: raw element inner content unchanged
src_c = " <script> \n var x=1; // keep spacing \n if(true) { console.log(x)} \n </script> \n "
fmt_c = format_string ( src_c , cfg )
c_lines = src_c . splitlines ( )
f_lines = fmt_c . splitlines ( )
if len ( c_lines ) > = 3 and len ( f_lines ) > = 3 :
check ( " raw inner unchanged " , c_lines [ 1 : - 1 ] == f_lines [ 1 : - 1 ] , detail = f " got { f_lines [ 1 : - 1 ] !r} " )
else :
check ( " raw structure " , False , " unexpected line count " )
# T4: delimiter spacing normalization for <% %>
src_d = " < % my $x=1; % > \n "
fmt_d = format_string ( src_d , cfg )
check ( " delimiter spacing " , " < % " in fmt_d and " % > " in fmt_d )
# T5: keyword spacing with flag on
cfg_kw = dc_replace ( cfg , perl_keyword_spacing = True )
fmt_k1 = format_string ( " < % i f($x) { % > \n " , cfg_kw )
check ( " kw if(...) " , " if ( " in fmt_k1 and " { " in fmt_k1 )
fmt_k2 = format_string ( " < % = return(1) % > \n " , cfg_kw )
check ( " kw return(...) " , " return ( " in fmt_k2 )
fmt_k3 = format_string ( ' < % s ay " hi " ; % > \n ' , cfg_kw )
check ( " kw say \" ... \" " , ' say " ' in fmt_k3 )
fmt_k4 = format_string ( " < % my($x,$y)=@_; % > \n " , cfg_kw )
check ( " kw my $ " , " my ( " in fmt_k4 and " = @_ " in fmt_k4 )
fmt_k5 = format_string ( " < % s ub foo { % > \n " , cfg_kw )
check ( " kw sub foo { " , " sub foo { " in fmt_k5 )
# T6: extended EP block formatting
src_e = " < % \n my $x=1; \n if($x) { \n say \" hi \" ; \n } \n % > \n "
fmt_e = format_string ( src_e , cfg )
check ( " extended block indented " , ( " if ( " in fmt_e and " say " in fmt_e and " { \n " in fmt_e ) or ( " if( " not in fmt_e ) )
if failures :
logger . error ( " SELF-TEST FAILURES: " )
for f in failures :
logger . error ( " - %s " , f )
return 1
logger . info ( " Self-test passed " )
return 0
def main ( argv : Optional [ List [ str ] ] = None ) - > int :
parser = build_arg_parser ( )
args = parser . parse_args ( argv )
setup_logging ( args . log_level , args . verbose )
if args . version :
print ( f " mojofmt { VERSION } " )
return 0
if args . self_test :
cfg = load_config ( args )
return self_test ( cfg )
# Validate --out usage
if args . out :
if args . write or args . check or args . diff :
parser . error ( " --out conflicts with --write/--check/--diff " )
cfg = load_config ( args )
out_path = Path ( args . out )
if args . stdin :
data = sys . stdin . read ( )
formatted = format_string ( data , cfg )
write_text ( out_path , formatted )
logger . info ( " Wrote %s (from stdin) " , out_path )
return 0
# must be exactly one input file
if not args . paths or len ( args . paths ) != 1 :
parser . error ( " --out requires exactly one input file (or use --stdin) " )
in_path = Path ( args . paths [ 0 ] )
original = read_text ( in_path )
formatted = format_string ( original , cfg )
write_text ( out_path , formatted )
logger . info ( " Wrote %s (from %s ) " , out_path , in_path )
return 0
cfg = load_config ( args )
if args . stdin :
return process_stdin_stdout ( cfg )
if not args . paths :
parser . error ( " No input paths provided (or use --stdin). " )
any_changed = False
any_error = False
for path in iter_files ( args . paths , cfg . extensions ) :
try :
changed , _ = process_file ( path , cfg , write = args . write , show_diff = args . diff , backup = args . write )
any_changed = any_changed or changed
except Exception as e :
any_error = True
logger . error ( " Error processing %s : %s " , path , e )
if args . check and any_changed :
return 1
return 1 if any_error else 0
if __name__ == " __main__ " :
sys . exit ( main ( ) )