r"""Doctests for PEP 292's string template functions

First, it is now a function and accepts either mappings or keyword arguments:

>>> simplesub('the $xxx and', {'xxx':10})
'the 10 and'
>>> simplesub('the $xxx and', xxx='10')
'the 10 and'


Next, it makes sure the return type is a str if all the inputs are a str.  Any
unicode components will cause a unicode output.  This matches the behavior of
other re and string ops:

>>> simplesub('the $xxx and', xxx='10')
'the 10 and'
>>> simplesub(u'the $xxx and', xxx='10')
u'the 10 and'
>>> simplesub('the $xxx and', xxx=u'10')
u'the 10 and'
>>> simplesub(u'the $xxx and', xxx=u'10')
u'the 10 and'


Non-strings are coerced to the type of the template:

>>> simplesub('the $xxx and', xxx=10)
'the 10 and'
>>> simplesub(u'the $xxx and', xxx=10)
u'the 10 and'


The ValueErrors are now more specific.  They include the line number and the
mismatched token:

>>> t = '''line one
... line two
... the $@malformed token
... line four'''
>>> simplesub(t, {})
Traceback (most recent call last):
 . . .
ValueError: Invalid placeholder on line 3:  '@malformed'


Also, the re pattern was changed just a bit to catch an important class of
language specific errors where a user may use a non-ASCII identifier. The
previous implementation would match up to the first non-ASCII character and
then return a KeyError if the abbreviated is (hopefully) found.  Now, it
returns a value error highlighting the problem identifier.  Note, we still
only accept Python identifiers but have improved error detection:

>>> t = u'Returning $ma\u00F1ana or later.'
>>> simplesub(t, {})
Traceback (most recent call last):
 . . .
ValueError: Invalid placeholder on line 1:  u'ma\xf1ana'


Exercise safe substitution:

>>> safesimplesub('$$ $name ${rank}', name='Guido', rank='BDFL')
'$ Guido BDFL'
>>> safesimplesub('$$ $name ${rank}')
'$ $name ${rank}'
>>> safesimplesub('$$ $@malformed ${rank}')
Traceback (most recent call last):
 . . .
ValueError: Invalid placeholder on line 1:  '@malformed'

"""



import re as _re

# Search for $$, $identifier, ${identifier}, and any bare $'s
_pattern = _re.compile(r"""
  \$(\$)|                       # Escape sequence of two $ signs
  \$([_a-z][_a-z0-9]*(?!\w))|   # $ and a Python identifier
  \${([_a-z][_a-z0-9]*)}|       # $ and a brace delimited identifier
  \$(\S*)                       # Catchall for ill-formed $ expressions
""", _re.IGNORECASE | _re.VERBOSE | _re.UNICODE)
# Pattern notes:
#
# The pattern for $identifier includes a negative lookahead assertion
# to make sure that the identifier is not followed by a Unicode
# alphanumeric character other than [_a-z0-9].  The idea is to make sure
# not to partially match an ill-formed identifiers containing characters
# from other alphabets.  Without the assertion the Spanish word for
# tomorrow "ma~nana" (where ~n is 0xF1) would improperly match of "ma"
# much to the surprise of the end-user (possibly an non-programmer).
#
# The catchall pattern has to come last because it captures non-space
# characters after a dollar sign not matched by a previous group.  Those
# captured characters make the error messages more informative.
#
# The substitution functions rely on the first three patterns matching
# with a non-empty string.  If that changes, then change lines like
# "if named" to "if named is not None".

del _re

def simplesub(template, mapping=None, **kwds):
    """A function for supporting $-substitutions."""
    if mapping is None:
        mapping = kwds
    def convert(mo):
        escaped, named, braced, catchall = mo.groups()
        if named or braced:
            return '%s' % mapping[named or braced]
        elif escaped:
            return '$'
        lineno = len(template[:mo.start(4)].splitlines())
        raise ValueError('Invalid placeholder on line %d:  %r' %
                         (lineno, catchall))
    return _pattern.sub(convert, template)

def safesimplesub(template, mapping=None, **kwds):
    """A function for $-substitutions.

    This function is 'safe' in the sense that you will never get KeyErrors if
    there are placeholders missing from the interpolation dictionary.  In that
    case, you will get the original placeholder in the value string.
    """
    if mapping is None:
        mapping = kwds
    def convert(mo):
        escaped, named, braced, catchall = mo.groups()
        if named:
            try:
                return '%s' % mapping[named]
            except KeyError:
                return '$' + named
        elif braced:
            try:
                return '%s' % mapping[braced]
            except KeyError:
                return '${' + braced + '}'
        elif escaped:
            return '$'
        lineno = len(template[:mo.start(4)].splitlines())
        raise ValueError('Invalid placeholder on line %d:  %r' %
                         (lineno, catchall))
    return _pattern.sub(convert, template)

if __name__ == '__main__':
    import doctest
    print 'Doctest results: ', doctest.testmod()
