#!/usr/bin/env python

from __future__ import with_statement

"""
Untabify C files, converting them from tab indent to 4-spaces indent.
Tries to take care of continuation lines, as well as vertical alignment.
Gives sufficiently good results on most files of the source tree; visual
inspection is still recommended to fix possible cosmetic breakage.

Notes:

- this script also removes trailing whitespace on processed files
- only whitespace is added/removed, therefore compilation can't be
  broken.
- only run on files really using tab indent; this shouldn't be run on files
  already using 4-spaces indent (use -l option to list files using tab indent)

Recipe for conversion of the whole source tree:

  $ untabify.py -b .
    # Revert copy of external lib
  $ svn revert -R Modules/_ctypes/libffi*
    # Revert files with mixed 4-spaces and tabs (they need separate handling)
  $ BADFILES="Modules/_hashopenssl.c Modules/_curses_panel.c Modules/_bsddb.c \
              Modules/unicodedata.c Modules/shamodule.c Modules/_hotshot.c \
              Modules/pyexpat.c Modules/parsermodule.c Modules/sha256module.c \
              Modules/fpectlmodule.c Modules/md5.c Modules/getpath.c \
              Modules/sha512module.c Modules/_codecsmodule.c Modules/zlibmodule.c \
              Objects/bytes_methods.c Objects/exceptions.c \
              Objects/weakrefobject.c \
              Objects/stringlib/string_format.h \
              Python/codecs.c"
  $ svn revert $BADFILES
  $ untabify.py --simple-replace $BADFILES
    # Revert files using chaotic indentation
  $ svn revert Modules/_cursesmodule.c
    # Done!

You can also rewrite your old patches with the -p option; if there are
still conflicts when applying, use `patch -l`.

"""

import os
import re
import sys
import tempfile
import optparse
import itertools
import contextlib


def untabify(lines, write):
    last_indent = -1
    last_outdent = -1
    last_outline = ""
    for line in lines:
        cr = line.find('\r')
        lf = line.find('\r')
        if lf == -1:
            eolpos = cr
        elif cr == -1:
            eolpos = lf
        else:
            assert cr != -1 and lf != -1
            eolpos = min(cr, lf)
        eol = line[eolpos:]
        line = line.rstrip()
        chunks = line.split("\t")
        output = []
        outpos = 0
        inpos = 0
        # Count leading spaces
        for n, c in enumerate(chunks):
            if not c:
                inpos = (inpos // 8 + 1) * 8
                outpos += 4
                continue
            if not c.startswith(' '):
                break
            chunks[n] = c.lstrip(' ')
            k = len(c) - len(chunks[n])
            inpos += k
            outpos += k
            if chunks[n]:
                break
            inpos = (inpos // 8 + 1) * 8
            outpos += 4
        # Continuation line?
        if (last_indent >= 0 and inpos > last_indent + 8
            # labels and end-of-comments can't be continued
            and not last_outline.endswith(':')
            and not last_outline.endswith('*/')):
            outpos = inpos + (last_outdent - last_indent)
            #print "--- Continuation line inpos=%d outpos=%d" % (inpos, outpos)
            indent = last_indent
            outdent = last_outdent
        else:
            outpos = (inpos // 8) * 4 + (inpos % 8)
            indent = inpos
            outdent = outpos
        output.append(' ' * outpos)
        # Process rest of the line, fixing position of internal tabs
        for c in chunks[n:-1]:
            output.append(c)
            outpos += len(c)
            inpos += len(c)
            # Pad for next tab
            inpos = (inpos // 8 + 1) * 8
            output.append((inpos - outpos) * " ")
            outpos = inpos
        output.append(chunks[-1])
        outline = "".join(output).rstrip()
        if outline and not outline.startswith('#'):
            # Non-empty, non-preprocessor line:
            # remember indentation for detection of continuation lines
            last_indent = indent
            last_outdent = outdent
            last_outline = outline
        write(outline + eol)

def replace_tabs(lines, write):
    """Simply replace tabs with 8-spaces without attempting to fix/change
    alignment."""
    for line in lines:
        cr = line.find('\r')
        lf = line.find('\r')
        if lf == -1:
            eolpos = cr
        elif cr == -1:
            eolpos = lf
        else:
            assert cr != -1 and lf != -1
            eolpos = min(cr, lf)
        eol = line[eolpos:]
        line = line.rstrip()
        chunks = line.split("\t")
        output = []
        outpos = 0
        inpos = 0
        for c in chunks[:-1]:
            output.append(c)
            outpos += len(c)
            inpos += len(c)
            # Pad for next tab
            inpos = (inpos // 8 + 1) * 8
            output.append((inpos - outpos) * " ")
            outpos = inpos
        output.append(chunks[-1])
        outline = "".join(output).rstrip()
        write(outline + eol)

def passthrough(lines, write):
    for l in lines:
        write(l)

def untabify_hunk(lines, write, transform_func):
    if not lines:
        return
    chars, lines = zip(*((l[:1], l[1:]) for l in lines))
    outlines = []
    transform_func(lines, outlines.append)
    for c, l in zip(chars, outlines):
        write(c + l)

def untabify_patch_fragment(lines, write, transform_func):
    lines = list(lines)
    if '\t' not in ''.join(lines):
        passthrough(lines, write)
        return
    hunk_chars = ' +-'
    hunk_lines = []
    for line in lines:
        c = line[:1]
        if c not in hunk_chars:
            untabify_hunk(hunk_lines, write, transform_func)
            hunk_lines = []
            write(line)
            continue
        hunk_lines.append(line)
    untabify_hunk(hunk_lines, write, transform_func)

def untabify_patch(lines, write, transform_func):
    patch_chars = ' +-<>!@'
    lines = iter(lines)
    line = next(lines)
    while True:
        patch_transform_func = passthrough
        for line in itertools.chain([line], lines):
            c = line[:1]
            if line[:4] in ('--- ', '+++ '):
                filepath = line.split()[1]
                if is_c_file(filepath):
                    patch_transform_func = transform_func
            elif c in patch_chars:
                break
            write(line)
        else:
            break
        fragment_lines = []
        for line in itertools.chain([line], lines):
            c = line[:1]
            if c not in patch_chars:
                break
            fragment_lines.append(line)
        untabify_patch_fragment(fragment_lines, write, transform_func)
        if c in patch_chars:
            break


def needs_untabifying(filepath):
    f = open(filepath, "rb")
    try:
        s = f.read(65536)
        # Heuristic: we don't want a couple of lone tabs to show as false
        # positives
        return s.count("\n\t") + s.count("\r\t") > 10
    finally:
        f.close()

def is_c_file(filepath):
    return filepath.endswith('.h') or filepath.endswith('.c')

def walk_c_files(paths):
    for p in paths:
        if os.path.isfile(p):
            yield p
        for dirpath, dirnames, filenames in os.walk(p):
            for fn in sorted(filenames):
                if is_c_file(fn):
                    yield os.path.join(dirpath, fn)

@contextlib.contextmanager
def rewrite_file(filepath):
    fd, tmpfn = tempfile.mkstemp(dir='.')
    try:
        fin = open(filepath, "rb")
        fout = os.fdopen(fd, "wb")
        try:
            yield fin, fout
        finally:
            fin.close()
            fout.close()
        os.rename(tmpfn, filepath)
    except:
        os.unlink(tmpfn)

def main():
    parser = optparse.OptionParser()
    parser.add_option("-l", "--list", dest="do_list_true",
                      action="store_true", default=False,
                      help="list files needing untabifying")
    parser.add_option("-n", "--list-neg", dest="do_list_false",
                      action="store_true", default=False,
                      help="list files *not* needing untabifying")
    parser.add_option("-u", "--untabify", dest="do_untab",
                      action="store_true", default=False,
                      help="untabify stdin to stdout")
    parser.add_option("-p", "--untabify-patch", dest="do_untab_patch",
                      action="store_true", default=False,
                      help="untabify patch (stdin to stdout)")
    parser.add_option("-b", "--batch", dest="do_batch",
                      action="store_true", default=False,
                      help="untabify specified files and dirs")
    parser.add_option("", "--simple-replace", dest="do_replace",
                      action="store_true", default=False,
                      help="simply replace tabs with 8-spaces (for files with mixed 4-spaces and tabs)")
    options, args = parser.parse_args()
    if (options.do_list_true + options.do_list_false +
        options.do_untab + options.do_untab_patch +
        options.do_batch + options.do_replace != 1):
        parser.error("you must specify exactly one of -l, -n, -b, -u and -p")

    if options.do_list_true or options.do_list_false:
        if not args:
            parser.error("-l and -n need a directory path")
        _filter = (itertools.ifilter if options.do_list_true else 
                   itertools.ifilterfalse)
        for cfn in _filter(needs_untabifying,
                           walk_c_files(args)):
            print cfn
        return

    if options.do_batch:
        if not args:
            parser.error("-b needs a file or directory path")
        for cfn in itertools.ifilter(needs_untabifying,
                                     walk_c_files(args)):
            with rewrite_file(cfn) as (fin, fout):
                print "Untabifing %s" % cfn
                untabify(fin, fout.write)

    if options.do_replace:
        if not args:
            parser.error("--simple-replace needs a file or directory path")
        for cfn in itertools.ifilter(needs_untabifying,
                                     walk_c_files(args)):
            with rewrite_file(cfn) as (fin, fout):
                print "Replacing tabs in %s" % cfn
                replace_tabs(fin, fout.write)

    if options.do_untab:
        untabify(sys.stdin, sys.stdout.write)
    
    if options.do_untab_patch:
        untabify_patch(sys.stdin, sys.stdout.write, untabify)

if __name__ == '__main__':
    main()

