Source code for pp_highlighting.pp_highlighter

"""Syntax highlighting for prompt_toolkit and HTML with pyparsing."""

import html
import warnings

from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import (FormattedText, PygmentsTokens,
                                           split_lines, to_formatted_text)
from prompt_toolkit.lexers import Lexer
from pygments.token import STANDARD_TYPES, Token
import pyparsing as pp

__all__ = ['dummy_styler', 'PPHighlighter']


class Styler(pp.ParserElement):
    """Saves the original, untokenized text matched by a parse expression as a
    prompt_toolkit text fragment."""
    def __init__(self, fragments, style, expr):
        super().__init__()
        self._fragments = fragments
        self.style = style
        if isinstance(expr, str):
            expr = self._literalStringClass(expr)
        self.expr = expr

    def __str__(self):
        return str(self.expr)

    def parseImpl(self, instring, loc, doActions=True):
        # pylint: disable=protected-access
        end_loc, toks = self.expr._parse(instring, loc, doActions, False)
        self._fragments[loc] = (self.style, instring[loc:end_loc])
        return end_loc, toks


class DummyStyler:
    """A drop-in replacement for :meth:`PPHighlighter.styler` which merely
    returns a copy of the given parse expression without capturing text or
    applying styles. To simplify testing whether a parser factory has been
    passed :func:`dummy_styler`, :code:`bool(dummy_styler)` is `False`.

    Args:
        style (Union[str, pygments.token.Token]): Ignored.
        expr (Union[str, pyparsing.ParserElement]): Copied, unless it is a
            string literal, in which case it will be wrapped by
            :attr:`pyparsing.ParserElement._literalStringClass` (default
            :class:`pyparsing.Literal`).

    Returns:
        pyparsing.ParserElement: A copy of the input parser element.
    """
    def __bool__(self):
        return False

    def __call__(self, style, expr):
        if isinstance(expr, str):
            # pylint: disable=protected-access
            return pp.ParserElement._literalStringClass(expr)
        return expr.copy()

    def __repr__(self):
        return '<{.__module__}.dummy_styler(style, expr)>'.format(self)

dummy_styler = DummyStyler()


[docs]class PPHighlighter(Lexer): """Syntax highlighting for prompt_toolkit and HTML with pyparsing. This class can be used to highlight text via its :meth:`highlight` method (for :func:`prompt_toolkit.print_formatted_text`—see `the prompt_toolkit documentation <https://python-prompt-toolkit.readthedocs.io/en/stable/pages/printing_text.html#>`_ for details), its :meth:`highlight_html` method, and by passing it as the `lexer` argument to a :class:`prompt_toolkit.PromptSession`. """
[docs] def __init__(self, parser_factory, *, pygments_styles=False): """Constructs a new :class:`PPHighlighter`. You should supply a parser factory, a function that takes one argument and returns a parse expression. :class:`PPHighlighter` will pass its :meth:`styler` method as the argument (see :meth:`styler` for more details). :meth:`styler` modifies parse expressions to capture and style the text they match. The `style` argument to :meth:`styler` can be either a prompt_toolkit style string or a Pygments token. Examples: >>> def parser_factory(styler): >>> a = styler('class:int', ppc.integer) >>> return pp.delimitedList(a) >>> pph = PPHighlighter(parser_factory) >>> pph.highlight('1, 2, 3') FormattedText([('class:int', '1'), ('', ', '), ('class:int', '2'), ('', ', '), ('class:int', '3')]) :class:`FormattedText` instances can be passed to :func:`prompt_toolkit.print_formatted_text`. Args: parser_factory (Callable[[Callable], pyparsing.ParserElement]): The parser factory. pygments_styles (bool): Whether or not the parser is styled using Pygments tokens. """ self._fragments = {} self._pygments_styles = pygments_styles self._parser = parser_factory(self.styler) self._parser.parseWithTabs()
def __repr__(self): return '{0.__class__.__name__}({0._parser!r})'.format(self)
[docs] def styler(self, style, expr): """Wraps a pyparsing parse expression to capture text fragments. :meth:`styler` wraps the given parse expression, capturing the original text it matched, and returns the modified parse expression. The `style` argument can be either a prompt_toolkit style string or a Pygments token. Args: style (Union[str, pygments.token.Token]): The style to set for this text fragment, as a string or a Pygments token. expr (Union[str, pyparsing.ParserElement]): The pyparsing parser to wrap. If a literal string is specified, it will be wrapped by :attr:`pyparsing.ParserElement._literalStringClass` (default :class:`pyparsing.Literal`). Returns: pyparsing.ParserElement: The wrapped parser. """ return Styler(self._fragments, style, expr)
def _scan_string(self, s): """Runs the parser over the input string, capturing styled text. Adapted from :meth:`pyparsing.ParserElement.scanString` for custom exception handling. """ if not self._parser.streamlined: self._parser.streamline() for e in self._parser.ignoreExprs: e.streamline() loc = 0 preloc = None pp.ParserElement.resetCache() while loc <= len(s): try: preloc = self._parser.preParse(s, loc) # pylint: disable=protected-access nextloc, _ = self._parser._parse(s, preloc, callPreParse=False) except Exception as err: # pylint: disable=broad-except if preloc is None: raise loc = preloc + 1 if not isinstance(err, pp.ParseBaseException): msg = 'Exception during parsing: {0.__class__.__name__}: {0}' warnings.warn(msg.format(err), RuntimeWarning) else: loc = nextloc if nextloc > loc else preloc + 1 def _highlight(self, s): """Gathers captured styled text and intervening unstyled text into a :class:`FormattedText` instance.""" if not isinstance(s, str): msg = 'Cannot highlight type {}, only str.' raise TypeError(msg.format(type(s).__name__)) default_style = Token.Text if self._pygments_styles else '' self._fragments.clear() self._scan_string(s) locs = sorted(self._fragments) locs.append(len(s)) i = 0 loc = 0 fragments = FormattedText() while loc < len(s): fragment = self._fragments.get(loc) if fragment: fragments.append(fragment) loc += len(fragment[1]) while locs[i] < loc: i += 1 else: fragments.append((default_style, s[loc:locs[i]])) loc = locs[i] return fragments
[docs] def highlight(self, s): """Highlights a string, returning a list of fragments suitable for :func:`prompt_toolkit.print_formatted_text`. Args: s (str): The input string. Returns: FormattedText: The resulting list of prompt_toolkit text fragments. """ fragments = self._highlight(s) if self._pygments_styles: return to_formatted_text(PygmentsTokens(fragments)) return fragments
[docs] def print(self, *values, **kwargs): """:: print(*values, sep=' ', end='\\n', file=None, flush=False, style=None, output=None, color_depth=None, style_transformation=None, include_default_pygments_style=None) Highlights and prints the values to a stream, or to `sys.stdout` by default. It calls :func:`prompt_toolkit.print_formatted_text` internally and takes the same keyword arguments as it (compatible with the builtin :func:`print`). """ print_formatted_text(*map(self.highlight, map(str, values)), **kwargs)
@classmethod def _pygments_css_class(cls, token): """Returns the standard CSS class name for a Pygments token.""" try: return STANDARD_TYPES[token] except KeyError: return cls._pygments_css_class(token.parent)
[docs] def highlight_html(self, s): """Highlights a string, returning HTML. Only CSS class names are currently supported. Parts of the style string that do not begin with ``class:`` will be ignored. If there are dots in the class name, they will be turned into hyphens. Args: s (str): The input string. Returns: str: The generated HTML. """ fragments = self._highlight(s) tags = ['<span class="highlight">'] template = '<span class="{}">{}</span>' table = str.maketrans({'.': '-'}) for style, text in fragments: classes = [] if self._pygments_styles: classes.append(self._pygments_css_class(style)) else: for st in style.split(): if st.startswith('class:'): classes.append(html.escape(st[6:].translate(table))) if classes and classes[0]: tags.append(template.format(' '.join(classes), html.escape(text))) else: tags.append(html.escape(text)) tags.append('</span>') return ''.join(tags)
[docs] def lex_document(self, document): lines = list(split_lines(self.highlight(document.text))) return lambda i: lines[i]