# # doctest.py: Syntax Highlighting for doctest blocks # Edward Loper # # Created [06/28/03 02:52 AM] # $Id: restructuredtext.py 1210 2006-04-10 13:25:50Z edloper $ # """ Syntax highlighting for doctest blocks. This module defines two functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can be used to perform syntax highlighting on doctest blocks. It also defines the more general C{colorize_doctest()}, which could be used to do syntac highlighting on doctest blocks with other output formats. (Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using C{colorize_doctest()}.) """ __docformat__ = 'epytext en' import re from epydoc.util import plaintext_to_html, plaintext_to_latex __all__ = ['doctest_to_html', 'doctest_to_latex', 'DoctestColorizer', 'XMLDoctestColorizer', 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer'] def doctest_to_html(s): """ Perform syntax highlighting on the given doctest string, and return the resulting HTML code. This code consists of a C{
}
    block with class=py-doctest.  Syntax highlighting is performed
    using the following css classes:
    
      - C{py-prompt} -- the Python PS1 prompt (>>>)
      - C{py-more} -- the Python PS2 prompt (...)
      - C{py-keyword} -- a Python keyword (for, if, etc.)
      - C{py-builtin} -- a Python builtin name (abs, dir, etc.)
      - C{py-string} -- a string literal
      - C{py-comment} -- a comment
      - C{py-except} -- an exception traceback (up to the next >>>)
      - C{py-output} -- the output from a doctest block.
      - C{py-defname} -- the name of a function or class defined by
        a C{def} or C{class} statement.
    """
    return HTMLDoctestColorizer().colorize_doctest(s)

def doctest_to_latex(s):
    """
    Perform syntax highlighting on the given doctest string, and
    return the resulting LaTeX code.  This code consists of an
    C{alltt} environment.  Syntax highlighting is performed using 
    the following new latex commands, which must be defined externally:
      - C{\pysrcprompt} -- the Python PS1 prompt (>>>)
      - C{\pysrcmore} -- the Python PS2 prompt (...)
      - C{\pysrckeyword} -- a Python keyword (for, if, etc.)
      - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
      - C{\pysrcstring} -- a string literal
      - C{\pysrccomment} -- a comment
      - C{\pysrcexcept} -- an exception traceback (up to the next >>>)
      - C{\pysrcoutput} -- the output from a doctest block.
      - C{\pysrcdefname} -- the name of a function or class defined by
        a C{def} or C{class} statement.
    """
    return LaTeXDoctestColorizer().colorize_doctest(s)

class DoctestColorizer:
    """
    An abstract base class for performing syntax highlighting on
    doctest blocks and other bits of Python code.  Subclasses should
    provide definitions for:

      - The L{markup()} method, which takes a substring and a tag, and
        returns a colorized version of the substring.
      - The L{PREFIX} and L{SUFFIX} variables, which will be added
        to the beginning and end of the strings returned by
        L{colorize_codeblock} and L{colorize_doctest}.  
    """

    #: A string that is added to the beginning of the strings
    #: returned by L{colorize_codeblock} and L{colorize_doctest}.
    #: Typically, this string begins a preformatted area.
    PREFIX = None

    #: A string that is added to the end of the strings
    #: returned by L{colorize_codeblock} and L{colorize_doctest}.
    #: Typically, this string ends a preformatted area.
    SUFFIX = None

    #: A list of the names of all Python keywords.  ('as' is included
    #: even though it is technically not a keyword.)
    _KEYWORDS = ("and       del       for       is        raise"
                 "assert    elif      from      lambda    return"
                 "break     else      global    not       try"
                 "class     except    if        or        while"
                 "continue  exec      import    pass      yield"
                 "def       finally   in        print     as").split()

    #: A list of all Python builtins.
    _BUILTINS = [_BI for _BI in dir(__builtins__)
                 if not _BI.startswith('__')]

    #: A regexp group that matches keywords.
    _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])

    #: A regexp group that matches Python builtins.
    _BUILTIN_GRP = (r'(?>>" prompts.
    _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
    
    #: A regexp group that matches Python "..." prompts.
    _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'

    #: A regexp group that matches function and class definitions.
    _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'

    #: A regexp that matches Python prompts
    PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
                           re.MULTILINE | re.DOTALL)

    #: A regexp that matches Python "..." prompts.
    PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
                            re.MULTILINE | re.DOTALL)

    #: A regexp that matches doctest exception blocks.
    EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
                           re.DOTALL | re.MULTILINE)

    #: A regexp that matches doctest directives.
    DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')

    #: A regexp that matches all of the regions of a doctest block
    #: that should be colored.
    DOCTEST_RE = re.compile(
        r'(.*?)((?P%s)|(?P%s)|(?P%s)|'
              r'(?P%s)|(?P%s)|'
              r'(?P%s)|(?P%s)|(?P\Z))' % (
        _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
        _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)

    #: This regular expression is used to find doctest examples in a
    #: string.  This is copied from the standard Python doctest.py
    #: module (after the refactoring in Python 2.4+).
    DOCTEST_EXAMPLE_RE = re.compile(r'''
        # Source consists of a PS1 line followed by zero or more PS2 lines.
        (?P
            (?:^(?P [ ]*) >>>    .*)    # PS1 line
            (?:\n           [ ]*  \.\.\. .*)*   # PS2 lines
          \n?)
        # Want consists of any non-blank lines that do not start with PS1.
        (?P (?:(?![ ]*$)    # Not a blank line
                     (?![ ]*>>>)  # Not a line starting with PS1
                     .*$\n?       # But any other line
                  )*)
        ''', re.MULTILINE | re.VERBOSE)

    def colorize_inline(self, s):
        """
        Colorize a string containing Python code.  Do not add the
        L{PREFIX} and L{SUFFIX} strings to the returned value.  This
        method is intended for generating syntax-highlighted strings
        that are appropriate for inclusion as inline expressions.
        """
        return self.DOCTEST_RE.sub(self.subfunc, s)

    def colorize_codeblock(self, s):
        """
        Colorize a string containing only Python code.  This method
        differs from L{colorize_doctest} in that it will not search
        for doctest prompts when deciding how to colorize the string.
        """
        body = self.DOCTEST_RE.sub(self.subfunc, s)
        return self.PREFIX + body + self.SUFFIX

    def colorize_doctest(self, s, strip_directives=False):
        """
        Colorize a string containing one or more doctest examples.
        """
        output = []
        charno = 0
        for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
            # Parse the doctest example:
            pysrc, want = m.group('source', 'want')
            # Pre-example text:
            output.append(s[charno:m.start()])
            # Example source code:
            output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
            # Example output:
            if want:
                if self.EXCEPT_RE.match(want):
                    output += '\n'.join([self.markup(line, 'except')
                                         for line in want.split('\n')])
                else:
                    output += '\n'.join([self.markup(line, 'output')
                                         for line in want.split('\n')])
            # Update charno
            charno = m.end()
        # Add any remaining post-example text.
        output.append(s[charno:])
        
        return self.PREFIX + ''.join(output) + self.SUFFIX
    
    def subfunc(self, match):
        other, text = match.group(1, 2)
        #print 'M %20r %20r' % (other, text) # <- for debugging
        if other:
            other = '\n'.join([self.markup(line, 'other')
                               for line in other.split('\n')])
            
        if match.group('PROMPT1'):
            return other + self.markup(text, 'prompt')
        elif match.group('PROMPT2'):
            return other + self.markup(text, 'more')
        elif match.group('KEYWORD'):
            return other + self.markup(text, 'keyword')
        elif match.group('BUILTIN'):
            return other + self.markup(text, 'builtin')
        elif match.group('COMMENT'):
            return other + self.markup(text, 'comment')
        elif match.group('STRING') and '\n' not in text:
            return other + self.markup(text, 'string')
        elif match.group('STRING'):
            # It's a multiline string; colorize the string & prompt
            # portion of each line.
            pieces = []
            for line in text.split('\n'):
                if self.PROMPT2_RE.match(line):
                    if len(line) > 4:
                        pieces.append(self.markup(line[:4], 'more') +
                                      self.markup(line[4:], 'string'))
                    else:
                        pieces.append(self.markup(line[:4], 'more'))
                elif line:
                    pieces.append(self.markup(line, 'string'))
                else:
                    pieces.append('')
            return other + '\n'.join(pieces)
        elif match.group('DEFINE'):
            m = re.match('(?P\w+)(?P\s+)(?P\w+)', text)
            return other + (self.markup(m.group('def'), 'keyword') +
                        self.markup(m.group('space'), 'other') +
                        self.markup(m.group('name'), 'defname'))
        elif match.group('EOS') is not None:
            return other
        else:
            assert 0, 'Unexpected match!'

    def markup(self, s, tag):
        """
        Apply syntax highlighting to a single substring from a doctest
        block.  C{s} is the substring, and C{tag} is the tag that
        should be applied to the substring.  C{tag} will be one of the
        following strings:
        
          - C{prompt} -- the Python PS1 prompt (>>>)
          - C{more} -- the Python PS2 prompt (...)
          - C{keyword} -- a Python keyword (for, if, etc.)
          - C{builtin} -- a Python builtin name (abs, dir, etc.)
          - C{string} -- a string literal
          - C{comment} -- a comment
          - C{except} -- an exception traceback (up to the next >>>)
          - C{output} -- the output from a doctest block.
          - C{defname} -- the name of a function or class defined by
            a C{def} or C{class} statement.
          - C{other} -- anything else (does *not* include output.)
        """
        raise AssertionError("Abstract method")

class XMLDoctestColorizer(DoctestColorizer):
    """
    A subclass of DoctestColorizer that generates XML-like output.
    This class is mainly intended to be used for testing purposes.
    """
    PREFIX = '\n'
    SUFFIX = '\n'
    def markup(self, s, tag):
        s = s.replace('&', '&').replace('<', '<').replace('>', '>')
        if tag == 'other': return s
        else: return '<%s>%s' % (tag, s, tag)

class HTMLDoctestColorizer(DoctestColorizer):
    """A subclass of DoctestColorizer that generates HTML output."""
    PREFIX = '
\n'
    SUFFIX = '
\n' def markup(self, s, tag): if tag == 'other': return plaintext_to_html(s) else: return ('%s' % (tag, plaintext_to_html(s))) class LaTeXDoctestColorizer(DoctestColorizer): """A subclass of DoctestColorizer that generates LaTeX output.""" PREFIX = '\\begin{alltt}\n' SUFFIX = '\\end{alltt}\n' def markup(self, s, tag): if tag == 'other': return plaintext_to_latex(s) else: return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))