Source code for pyaota.generator.yaml2tex

"""
LaTeX rendering helpers for pyaota.
"""

import re

import logging

logger = logging.getLogger(__name__)

# ---------- Normalization helpers ----------

[docs] def normalize_punctuation(s: str) -> str: """Normalize Word-style Unicode punctuation to ASCII/TeX-friendly equivalents.""" replacements = { "…": "...", "“": '"', "”": '"', "‘": "'", "’": "'", "–": "-", "—": "--", "\u2011": "-", # non-breaking hyphen # Superscript digits "\u00b9": r"\textsuperscript{1}", "\u00b2": r"\textsuperscript{2}", "\u00b3": r"\textsuperscript{3}", "\u2070": r"\textsuperscript{0}", "\u2074": r"\textsuperscript{4}", "\u2075": r"\textsuperscript{5}", "\u2076": r"\textsuperscript{6}", "\u2077": r"\textsuperscript{7}", "\u2078": r"\textsuperscript{8}", "\u2079": r"\textsuperscript{9}", } for k, v in replacements.items(): s = s.replace(k, v) return s
[docs] def tex_escape(s: str) -> str: s = normalize_punctuation(s) return tex_escape_plain(s)
[docs] def tex_escape_plain(s: str) -> str: """ Escape LaTeX special characters in *normal text* context. Also convert runs of 4+ underscores to \\blank{}. """ # convert "____" etc to placeholder first s = re.sub(r'_{4,}', '<<BLANK>>', s) s = re.sub(r'_{2,}', '<<SMALLBLANK>>', s) # optional: smaller blank for 2-3 underscores replacements = { '\\': r'\textbackslash{}', '&': r'\&', '%': r'\%', '$': r'\$', '#': r'\#', '_': r'\_', '{': r'\{', '}': r'\}', '~': r'\textasciitilde{}', '^': r'\textasciicircum{}', } out = [] for ch in s: out.append(replacements.get(ch, ch)) esc = ''.join(out) # turn placeholder back into \blank{} esc = esc.replace('<<BLANK>>', r'\blank{}') esc = esc.replace('<<SMALLBLANK>>', r'\smallblank{}') return esc
[docs] def tex_escape_texttt(s: str) -> str: """ Escape content for use inside \\texttt{...} (non-verbatim monospace). Unlike tex_escape_inline_code, this must escape \\ because \\texttt processes its argument as normal LaTeX, so \\n would be an undefined control sequence. """ replacements = { '\\': r'\textbackslash{}', '$': r'\$', '%': r'\%', '#': r'\#', '&': r'\&', '_': r'\_', '{': r'\{', '}': r'\}', '~': r'\textasciitilde{}', '^': r'\textasciicircum{}', } out = [] for ch in s: out.append(replacements.get(ch, ch)) return ''.join(out)
[docs] def tex_escape_inline_code(s: str) -> str: """ Escape content that will go inside \\inl{...}. - normalize punctuation - escape %, _, {, } so TeX doesn't get confused """ s = normalize_punctuation(s) replacements = { '^^' : r'\^\^', '^' : r'\^', '%': r'\%', '_': r'\_', '{': r'\{', '}': r'\}', } out = [] for ch in s: out.append(replacements.get(ch, ch)) return ''.join(out)
# ---------- Render helpers ----------
[docs] def render_text(s: str, default_style: str | None = None) -> str: """ Render a text string that may contain ``inline code`` and ``@@inline math@@`` markers into LaTeX. - ``code`` regions use \\verb|...| (verbatim mode, default_style=None) or \\inl{...} (listings mode, default_style set) - ``@@math@@`` regions are passed through as ``$...$`` - Everything else is escaped for normal text. """ s = normalize_punctuation(s) result_parts = [] pos = 0 # Match @@...@@ (math) or ``...`` (code) in a single pass pattern = re.compile(r'@@(.*?)@@|``(.*?)``') for m in pattern.finditer(s): before = s[pos:m.start()] if before: result_parts.append(tex_escape_plain(before)) if m.group(1) is not None: # @@...@@ → inline math, pass through raw result_parts.append(f"${m.group(1)}$") else: # ``...`` → inline code # \verb cannot be used inside command arguments (e.g. \choice{...}), # so always use \texttt with escaping for text-embedded inline code. code = m.group(2) if default_style: result_parts.append(f"\\inl{{{tex_escape_inline_code(code)}}}") else: result_parts.append(f"\\texttt{{{tex_escape_texttt(code)}}}") pos = m.end() tail = s[pos:] if tail: result_parts.append(tex_escape_plain(tail)) return ''.join(result_parts)
_VERB_DELIMITERS = '|@!~+' def _choose_verb_delimiter(code: str) -> str | None: for c in _VERB_DELIMITERS: if c not in code: return c return None
[docs] def render_code_block(text: str, style: str | None = None, force_env: bool = True) -> tuple[str, str]: """ Render code as either an inline or block snippet. When *style* is None (the default) the output uses plain LaTeX verbatim: - ("inline", '\\verb|...|') for a single nonblank line - ("block", '\\begin{verbatim}...') for multi-line When *style* is a listings style name the output uses listings: - ("inline", '\\inl{...}') single line - ("block", '\\begin{lstlisting}[style=...]...') multi-line Caller decides how to embed the result. """ text = normalize_punctuation(text) code_lines = text.splitlines() # Strip leading/trailing blank lines while code_lines and not code_lines[0].strip(): code_lines.pop(0) while code_lines and not code_lines[-1].strip(): code_lines.pop() # No code at all → empty block if not code_lines: if style: return "block", f"\\begin{{lstlisting}}[style={style}]\n\\end{{lstlisting}}" else: return "block", "\\begin{verbatim}\n\\end{verbatim}" # Single line → inline (only when force_env is False) if len(code_lines) == 1 and not force_env: single = code_lines[0] if style: return "inline", f"\n\\inl{{{tex_escape_inline_code(single)}}}" else: delim = _choose_verb_delimiter(single) if delim: return "inline", f"\n\\verb{delim}{single}{delim}" # Fallback: escape and use \texttt when no safe delimiter exists return "inline", f"\n\\texttt{{{tex_escape_texttt(single)}}}" # Multi-line → block code = "\n".join(code_lines) if style: return "block", ( f"\\begin{{lstlisting}}[style={style}]\n" + code + "\n\\end{lstlisting}" ) else: return "block", ( "\\begin{verbatim}\n" + code + "\n\\end{verbatim}" )
[docs] def render_stem_block(block: dict, default_style: str | None = None) -> str: """ Render a single stem block (text, code, or image) to LaTeX. """ btype = block.get("type", "text") if btype == "text": txt = block.get("text", "") return render_text(txt, default_style=default_style) elif btype == "code": text = block.get("text", "") # Only use the block's style override when listings mode is active; # in verbatim mode (default_style is None) always use None. style = block.get("style", default_style) if default_style is not None else None kind, tex = render_code_block(text, style) # For stems, just return whatever tex we got (inline or block) return tex elif btype == "image": src = block.get("src", "") width = block.get("width", "") # Convert pixel width to a fraction of linewidth (assume ~500px ≈ full width) lw_frac = 0.5 if width: try: px = int(re.sub(r'[^0-9]', '', width)) lw_frac = min(round(px / 500, 2), 1.0) except (ValueError, TypeError): pass return ( f"\n\\begin{{center}}\n" f"\\includegraphics[width={lw_frac}\\linewidth]{{images/{src}}}\n" f"\\end{{center}}" ) else: return f"% [unhandled stem block type: {btype}]"
def _visual_len(text: str) -> int: """Estimate the visual character width of a raw YAML choice text string. Strips ``...`` inline-code markers and @@...@@ math markers, which add character overhead relative to their rendered width. """ s = re.sub(r'``(.*?)``', r'\1', text) s = re.sub(r'@@(.*?)@@', r'\1', s) return len(s) def _choices_columns(choices: list[dict], max_text_len: int = 40, max_code_len: int = 28) -> int: """Return 2 if all choices are short enough for a 2-column layout, else 1. Code choices use a tighter threshold because monospace glyphs are wider than proportional-font glyphs at the same point size. """ for choice in choices: ctype = choice.get("type", "text") raw = str(choice.get("text", "")) if ctype == "code": lines = [l for l in raw.splitlines() if l.strip()] if not lines: continue if max(len(l) for l in lines) > max_code_len: return 1 else: if _visual_len(raw) > max_text_len: return 1 return 2
[docs] def render_choice( choice: dict, correct_key: str | None = None, highlight_correct: bool = False, default_style: str | None = None, ) -> str: """ Render a single choice dict to LaTeX. - type: "text" -> \\choice[<label>]{<text>} - type: "code" -> inline or block via render_code_block - highlight_correct: if True, the correct choice's LABEL is wrapped in \\correctlabel{...}, leaving the body (including \\inl) untouched. """ key = choice["key"] raw_text = str(choice.get("text", "")) ctype = choice.get("type", "text") # Only use the choice's style override when listings mode is active. style = choice.get("style", default_style) if default_style is not None else None is_correct = (correct_key is not None) and (key == correct_key) if highlight_correct and is_correct: label = rf"\correctlabel{{{key}}}" # label = rf"\textbf{{{key}}}." else: # label = rf"\circledletter{{{key}}}" label = f"{key}." # --- Code choices --- if ctype == "code": kind, tex = render_code_block(raw_text, style, force_env=False) # Emit code after empty choice body so \lstinline is not nested inside # a command argument — pre-tokenized args break \lstinline's verbatim # space handling, causing spaces inside string literals (e.g. " ") to # disappear in the rendered output. lines = [] lines.append(rf" \choice[{label}]{{}}") lines.append(tex.lstrip('\n')) lines.append("") # blank line between code choices return "\n".join(lines) # --- Text choices --- body = render_text(raw_text, default_style=default_style) return rf" \choice[{label}]{{{body}}}"
[docs] def render_question( q: dict, **kwargs) -> str: """ Render a single question dict to LaTeX, dispatching to the appropriate renderer based on question type. """ logger.debug(f"Rendering question ID={q.get('id','')} type={q.get('type','mcq')} kwargs={kwargs}") qtype = q.get("type", "mcq").lower() if qtype == "mcq": return render_mcq(q, **kwargs) elif qtype == "tf": return render_tf(q, **kwargs)
[docs] def render_mcq( q: dict, show_id: bool = False, highlight_correct: bool = False, scramble_choices: bool = False, default_style: str | None = None, ) -> str: """ Render a single multiple-choice question to LaTeX. - show_id: if True, prefix the first text stem block with "(ID) ". - highlight_correct: if True, visually mark the correct answer. """ qid = q.get("id", "") points = q.get("points", 1) correct_key = q.get("correct", "").strip() topic = q.get("topic", "") choices = q.get("choices", []) choices_text = [choice.get("text", "") for choice in choices] choices_keys = [choice.get("key", "") for choice in choices] correct_text = None # Save the text of the correct answer for later use for choice_text, choice_key in zip(choices_text, choices_keys): if choice_key == correct_key: correct_text = choice_text break if scramble_choices: import random random.shuffle(choices_text) for choice, text in choices, choices_text: choice["text"] = text if text == correct_text: correct_key = choice.get("key") break # Make a shallow copy of stem blocks so we don't mutate the original stem_blocks = [dict(block) for block in q.get("stem", [])] if show_id: # Prepend "(ID)" to the *first* text block for block in stem_blocks: if block.get("type") == "text": orig = block.get("text", "") block["text"] = f"({qid}) " + orig break # Render stem stem_lines: list[str] = [] for block in stem_blocks: stem_lines.append(render_stem_block(block, default_style=default_style)) stem_lines.append("") # blank line between blocks stem_tex = "\n".join(stem_lines).rstrip() # Render choices ncols = _choices_columns(q.get("choices", [])) choice_lines: list[str] = [] choice_lines.append(rf"\begin{{choices}}[{ncols}]") for choice in q.get("choices", []): choice_lines.append( render_choice( choice, correct_key=correct_key, highlight_correct=highlight_correct, default_style=default_style, ) ) choice_lines.append(r"\end{choices}") choices_tex = "\n".join(choice_lines) # If the stem ends with a lstlisting block, cancel its belowskip so the # choices don't have excess space above them. last_stem_block = stem_blocks[-1] if stem_blocks else None if last_stem_block is not None and last_stem_block.get("type") == "code": choices_tex = r"\vspace{-\medskipamount}" + "\n" + choices_tex # Wrap in mcq environment; third arg is still the correct key mcq_lines = [ rf"\begin{{mcq}}{{{qid}}}{{{points}}}{{{correct_key}}}", stem_tex, choices_tex, r"\end{mcq}", ] return "\n".join(mcq_lines)
[docs] def render_tf( q: dict, show_id: bool = False, highlight_correct: bool = False, default_style: str | None = None, ) -> str: """ Render a single true/false question to LaTeX. - show_id: if True, prefix the first text stem block with "(ID) ". - highlight_correct: if True, visually mark the correct answer. """ qid = q.get("id", "") points = q.get("points", 1) answer = q["answer"] if "answer" in q else q.get("correct") correct_key = "T" if answer else "F" topic = q.get("topic", "") # Make a shallow copy of stem blocks so we don't mutate the original stem_blocks = [dict(block) for block in q.get("stem", [])] if show_id: # Prepend "(ID)" to the *first* text block for block in stem_blocks: if block.get("type") == "text": orig = block.get("text", "") block["text"] = f"({qid}) " + orig break # Build the T/F prefix label if highlight_correct: if correct_key == "T": tf_label = r"\correctlabel{T}\,True\,/\,False\,(F)\enspace " else: tf_label = r"True\,(T)\,/\,\correctlabel{F}\,False\enspace " else: tf_label = r"\textbf{True\,(T)\,/\,False\,(F)}\enspace " # Render stem, prepending the T/F label stem_lines: list[str] = [] for i, block in enumerate(stem_blocks): rendered = render_stem_block(block, default_style=default_style) if i == 0 and block.get("type") == "text": rendered = tf_label + rendered stem_lines.append(rendered) stem_lines.append("") # blank line between blocks stem_tex = "\n".join(stem_lines).rstrip() # Wrap in tf environment; third arg is still the correct key tf_lines = [ rf"\begin{{tf}}{{{qid}}}{{{points}}}{{{correct_key}}}", stem_tex, r"\end{tf}", ] return "\n".join(tf_lines)