Source code for pyaota.generator.yaml2tex

"""
LaTeX rendering helpers for pyaota.
"""

import re

import logging

logger = logging.getLogger(__name__)

# ---------- Normalization helpers ----------


[docs]
def normalize_punctuation(s: str) -> str:
    """Normalize Word-style Unicode punctuation to ASCII/TeX-friendly equivalents."""
    replacements = {
        "…": "...",
        "“": '"',
        "”": '"',
        "‘": "'",
        "’": "'",
        "–": "-",
        "—": "--",
        "\u2011": "-",  # non-breaking hyphen
        # Superscript digits
        "\u00b9": r"\textsuperscript{1}",
        "\u00b2": r"\textsuperscript{2}",
        "\u00b3": r"\textsuperscript{3}",
        "\u2070": r"\textsuperscript{0}",
        "\u2074": r"\textsuperscript{4}",
        "\u2075": r"\textsuperscript{5}",
        "\u2076": r"\textsuperscript{6}",
        "\u2077": r"\textsuperscript{7}",
        "\u2078": r"\textsuperscript{8}",
        "\u2079": r"\textsuperscript{9}",
    }
    for k, v in replacements.items():
        s = s.replace(k, v)
    return s



[docs]
def tex_escape(s: str) -> str:
    s = normalize_punctuation(s)
    return tex_escape_plain(s)



[docs]
def tex_escape_plain(s: str) -> str:
    """
    Escape LaTeX special characters in *normal text* context.
    Also convert runs of 4+ underscores to \\blank{}.
    """
    # convert "____" etc to placeholder first
    s = re.sub(r'_{4,}', '<<BLANK>>', s)
    s = re.sub(r'_{2,}', '<<SMALLBLANK>>', s)  # optional: smaller blank for 2-3 underscores

    replacements = {
        '\\': r'\textbackslash{}',
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\textasciicircum{}',
    }
    out = []
    for ch in s:
        out.append(replacements.get(ch, ch))
    esc = ''.join(out)

    # turn placeholder back into \blank{}
    esc = esc.replace('<<BLANK>>', r'\blank{}')
    esc = esc.replace('<<SMALLBLANK>>', r'\smallblank{}')
    return esc



[docs]
def tex_escape_texttt(s: str) -> str:
    """
    Escape content for use inside \\texttt{...} (non-verbatim monospace).

    Unlike tex_escape_inline_code, this must escape \\ because \\texttt
    processes its argument as normal LaTeX, so \\n would be an undefined
    control sequence.
    """
    replacements = {
        '\\': r'\textbackslash{}',
        '$': r'\$',
        '%': r'\%',
        '#': r'\#',
        '&': r'\&',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\textasciicircum{}',
    }
    out = []
    for ch in s:
        out.append(replacements.get(ch, ch))
    return ''.join(out)




[docs]
def tex_escape_inline_code(s: str) -> str:
    """
    Escape content that will go inside \\inl{...}.

    - normalize punctuation
    - escape %, _, {, } so TeX doesn't get confused
    """
    s = normalize_punctuation(s)

    replacements = {
        '^^' : r'\^\^',
        '^' : r'\^',
        '%': r'\%',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
    }
    out = []
    for ch in s:
        out.append(replacements.get(ch, ch))
    return ''.join(out)


# ---------- Render helpers ----------


[docs]
def render_text(s: str, default_style: str | None = None) -> str:
    """
    Render a text string that may contain ``inline code`` and
    ``@@inline math@@`` markers into LaTeX.

    - ``code`` regions use \\verb|...| (verbatim mode, default_style=None)
      or \\inl{...} (listings mode, default_style set)
    - ``@@math@@`` regions are passed through as ``$...$``
    - Everything else is escaped for normal text.
    """
    s = normalize_punctuation(s)
    result_parts = []
    pos = 0
    # Match @@...@@ (math) or ``...`` (code) in a single pass
    pattern = re.compile(r'@@(.*?)@@|``(.*?)``')

    for m in pattern.finditer(s):
        before = s[pos:m.start()]
        if before:
            result_parts.append(tex_escape_plain(before))

        if m.group(1) is not None:
            # @@...@@ → inline math, pass through raw
            result_parts.append(f"${m.group(1)}$")
        else:
            # ``...`` → inline code
            # \verb cannot be used inside command arguments (e.g. \choice{...}),
            # so always use \texttt with escaping for text-embedded inline code.
            code = m.group(2)
            if default_style:
                result_parts.append(f"\\inl{{{tex_escape_inline_code(code)}}}")
            else:
                result_parts.append(f"\\texttt{{{tex_escape_texttt(code)}}}")

        pos = m.end()

    tail = s[pos:]
    if tail:
        result_parts.append(tex_escape_plain(tail))

    return ''.join(result_parts)


_VERB_DELIMITERS = '|@!~+'

def _choose_verb_delimiter(code: str) -> str | None:
    for c in _VERB_DELIMITERS:
        if c not in code:
            return c
    return None



[docs]
def render_code_block(text: str, style: str | None = None, force_env: bool = True) -> tuple[str, str]:
    """
    Render code as either an inline or block snippet.

    When *style* is None (the default) the output uses plain LaTeX verbatim:
      - ("inline",  '\\verb|...|')          for a single nonblank line
      - ("block",   '\\begin{verbatim}...') for multi-line

    When *style* is a listings style name the output uses listings:
      - ("inline",  '\\inl{...}')                          single line
      - ("block",   '\\begin{lstlisting}[style=...]...')   multi-line

    Caller decides how to embed the result.
    """
    text = normalize_punctuation(text)
    code_lines = text.splitlines()

    # Strip leading/trailing blank lines
    while code_lines and not code_lines[0].strip():
        code_lines.pop(0)
    while code_lines and not code_lines[-1].strip():
        code_lines.pop()

    # No code at all → empty block
    if not code_lines:
        if style:
            return "block", f"\\begin{{lstlisting}}[style={style}]\n\\end{{lstlisting}}"
        else:
            return "block", "\\begin{verbatim}\n\\end{verbatim}"

    # Single line → inline (only when force_env is False)
    if len(code_lines) == 1 and not force_env:
        single = code_lines[0]
        if style:
            return "inline", f"\n\\inl{{{tex_escape_inline_code(single)}}}"
        else:
            delim = _choose_verb_delimiter(single)
            if delim:
                return "inline", f"\n\\verb{delim}{single}{delim}"
            # Fallback: escape and use \texttt when no safe delimiter exists
            return "inline", f"\n\\texttt{{{tex_escape_texttt(single)}}}"

    # Multi-line → block
    code = "\n".join(code_lines)
    if style:
        return "block", (
            f"\\begin{{lstlisting}}[style={style}]\n"
            + code +
            "\n\\end{lstlisting}"
        )
    else:
        return "block", (
            "\\begin{verbatim}\n"
            + code +
            "\n\\end{verbatim}"
        )




[docs]
def render_stem_block(block: dict, default_style: str | None = None) -> str:
    """
    Render a single stem block (text, code, or image) to LaTeX.
    """
    btype = block.get("type", "text")
    if btype == "text":
        txt = block.get("text", "")
        return render_text(txt, default_style=default_style)
    elif btype == "code":
        text = block.get("text", "")
        # Only use the block's style override when listings mode is active;
        # in verbatim mode (default_style is None) always use None.
        style = block.get("style", default_style) if default_style is not None else None
        kind, tex = render_code_block(text, style)
        # For stems, just return whatever tex we got (inline or block)
        return tex
    elif btype == "image":
        src = block.get("src", "")
        width = block.get("width", "")
        # Convert pixel width to a fraction of linewidth (assume ~500px ≈ full width)
        lw_frac = 0.5
        if width:
            try:
                px = int(re.sub(r'[^0-9]', '', width))
                lw_frac = min(round(px / 500, 2), 1.0)
            except (ValueError, TypeError):
                pass
        return (
            f"\n\\begin{{center}}\n"
            f"\\includegraphics[width={lw_frac}\\linewidth]{{images/{src}}}\n"
            f"\\end{{center}}"
        )
    else:
        return f"% [unhandled stem block type: {btype}]"


def _visual_len(text: str) -> int:
    """Estimate the visual character width of a raw YAML choice text string.

    Strips ``...`` inline-code markers and @@...@@ math markers, which add
    character overhead relative to their rendered width.
    """
    s = re.sub(r'``(.*?)``', r'\1', text)
    s = re.sub(r'@@(.*?)@@', r'\1', s)
    return len(s)


def _choices_columns(choices: list[dict], max_text_len: int = 40, max_code_len: int = 28) -> int:
    """Return 2 if all choices are short enough for a 2-column layout, else 1.

    Code choices use a tighter threshold because monospace glyphs are wider
    than proportional-font glyphs at the same point size.
    """
    for choice in choices:
        ctype = choice.get("type", "text")
        raw = str(choice.get("text", ""))
        if ctype == "code":
            lines = [l for l in raw.splitlines() if l.strip()]
            if not lines:
                continue
            if max(len(l) for l in lines) > max_code_len:
                return 1
        else:
            if _visual_len(raw) > max_text_len:
                return 1
    return 2



[docs]
def render_choice(
    choice: dict,
    correct_key: str | None = None,
    highlight_correct: bool = False,
    default_style: str | None = None,
) -> str:
    """
    Render a single choice dict to LaTeX.

    - type: "text"  -> \\choice[<label>]{<text>}
    - type: "code"  -> inline or block via render_code_block
    - highlight_correct: if True, the correct choice's LABEL is wrapped
      in \\correctlabel{...}, leaving the body (including \\inl) untouched.
    """
    key = choice["key"]
    raw_text = str(choice.get("text", ""))
    ctype = choice.get("type", "text")
    # Only use the choice's style override when listings mode is active.
    style = choice.get("style", default_style) if default_style is not None else None

    is_correct = (correct_key is not None) and (key == correct_key)

    if highlight_correct and is_correct:
        label = rf"\correctlabel{{{key}}}"
        # label = rf"\textbf{{{key}}}."
    else:
        # label = rf"\circledletter{{{key}}}"
        label = f"{key}."

    # --- Code choices ---
    if ctype == "code":
        kind, tex = render_code_block(raw_text, style, force_env=False)

        # Emit code after empty choice body so \lstinline is not nested inside
        # a command argument — pre-tokenized args break \lstinline's verbatim
        # space handling, causing spaces inside string literals (e.g. " ") to
        # disappear in the rendered output.
        lines = []
        lines.append(rf"  \choice[{label}]{{}}")
        lines.append(tex.lstrip('\n'))
        lines.append("")  # blank line between code choices
        return "\n".join(lines)

    # --- Text choices ---
    body = render_text(raw_text, default_style=default_style)
    return rf"  \choice[{label}]{{{body}}}"



[docs]
def render_question(
    q: dict,
    **kwargs) -> str:
    """
    Render a single question dict to LaTeX, dispatching
    to the appropriate renderer based on question type.
    """
    logger.debug(f"Rendering question ID={q.get('id','')} type={q.get('type','mcq')} kwargs={kwargs}")
    qtype = q.get("type", "mcq").lower()
    if qtype == "mcq":
        return render_mcq(q, **kwargs)
    elif qtype == "tf":
        return render_tf(q, **kwargs)



[docs]
def render_mcq(
    q: dict,
    show_id: bool = False,
    highlight_correct: bool = False,
    scramble_choices: bool = False,
    default_style: str | None = None,
) -> str:
    """
    Render a single multiple-choice question to LaTeX.

    - show_id: if True, prefix the first text stem block with "(ID) ".
    - highlight_correct: if True, visually mark the correct answer.
    """
    qid = q.get("id", "")
    points = q.get("points", 1)
    correct_key = q.get("correct", "").strip()
    topic = q.get("topic", "")
    choices = q.get("choices", [])
    choices_text = [choice.get("text", "") for choice in choices]
    choices_keys = [choice.get("key", "") for choice in choices]
    correct_text = None
    # Save the text of the correct answer for later use
    for choice_text, choice_key in zip(choices_text, choices_keys):
        if choice_key == correct_key:
            correct_text = choice_text
            break
    if scramble_choices:
        import random
        random.shuffle(choices_text)
        for choice, text in choices, choices_text:
            choice["text"] = text
            if text == correct_text:
                correct_key = choice.get("key")
                break
    # Make a shallow copy of stem blocks so we don't mutate the original
    stem_blocks = [dict(block) for block in q.get("stem", [])]

    if show_id:
        # Prepend "(ID)" to the *first* text block
        for block in stem_blocks:
            if block.get("type") == "text":
                orig = block.get("text", "")
                block["text"] = f"({qid}) " + orig
                break

    # Render stem
    stem_lines: list[str] = []
    for block in stem_blocks:
        stem_lines.append(render_stem_block(block, default_style=default_style))
        stem_lines.append("")  # blank line between blocks
    stem_tex = "\n".join(stem_lines).rstrip()

    # Render choices
    ncols = _choices_columns(q.get("choices", []))
    choice_lines: list[str] = []
    choice_lines.append(rf"\begin{{choices}}[{ncols}]")
    for choice in q.get("choices", []):
        choice_lines.append(
            render_choice(
                choice,
                correct_key=correct_key,
                highlight_correct=highlight_correct,
                default_style=default_style,
            )
        )
    choice_lines.append(r"\end{choices}")
    choices_tex = "\n".join(choice_lines)

    # If the stem ends with a lstlisting block, cancel its belowskip so the
    # choices don't have excess space above them.
    last_stem_block = stem_blocks[-1] if stem_blocks else None
    if last_stem_block is not None and last_stem_block.get("type") == "code":
        choices_tex = r"\vspace{-\medskipamount}" + "\n" + choices_tex

    # Wrap in mcq environment; third arg is still the correct key
    mcq_lines = [
        rf"\begin{{mcq}}{{{qid}}}{{{points}}}{{{correct_key}}}",
        stem_tex,
        choices_tex,
        r"\end{mcq}",
    ]
    return "\n".join(mcq_lines)



[docs]
def render_tf(
    q: dict,
    show_id: bool = False,
    highlight_correct: bool = False,
    default_style: str | None = None,
) -> str:
    """
    Render a single true/false question to LaTeX.

    - show_id: if True, prefix the first text stem block with "(ID) ".
    - highlight_correct: if True, visually mark the correct answer.
    """
    qid = q.get("id", "")
    points = q.get("points", 1)
    answer = q["answer"] if "answer" in q else q.get("correct")
    correct_key = "T" if answer else "F"
    topic = q.get("topic", "")

    # Make a shallow copy of stem blocks so we don't mutate the original
    stem_blocks = [dict(block) for block in q.get("stem", [])]

    if show_id:
        # Prepend "(ID)" to the *first* text block
        for block in stem_blocks:
            if block.get("type") == "text":
                orig = block.get("text", "")
                block["text"] = f"({qid}) " + orig
                break

    # Build the T/F prefix label
    if highlight_correct:
        if correct_key == "T":
            tf_label = r"\correctlabel{T}\,True\,/\,False\,(F)\enspace "
        else:
            tf_label = r"True\,(T)\,/\,\correctlabel{F}\,False\enspace "
    else:
        tf_label = r"\textbf{True\,(T)\,/\,False\,(F)}\enspace "

    # Render stem, prepending the T/F label
    stem_lines: list[str] = []
    for i, block in enumerate(stem_blocks):
        rendered = render_stem_block(block, default_style=default_style)
        if i == 0 and block.get("type") == "text":
            rendered = tf_label + rendered
        stem_lines.append(rendered)
        stem_lines.append("")  # blank line between blocks
    stem_tex = "\n".join(stem_lines).rstrip()

    # Wrap in tf environment; third arg is still the correct key
    tf_lines = [
        rf"\begin{{tf}}{{{qid}}}{{{points}}}{{{correct_key}}}",
        stem_tex,
        r"\end{tf}",
    ]
    return "\n".join(tf_lines)