#!/usr/bin/python
import os
import sys
import re

DEFAULTS = ['', '-r', '--P', os.getcwd() + '/atarashii', 'tweepy']

assign = re.compile('=\w')
assign2 = re.compile('\w=')
number_regex = re.compile('(\-[0-9]+)')

def get_next(lines, start):
    token_indent = 0
    string = 0
    last_end = False
    for l, i in enumerate(lines[start:]):
        indent = len(i) - len(i.lstrip())
        l = start + l
        i = i.strip()
        
        if i.endswith('\\'):
            token_indent = indent
            last_end = True
            if i[0] not in '-+=*/':
                break
        
        elif not i.startswith('#') and not i.endswith('\\'):
            if not last_end:
                token_indent = indent
                break
            
            last_end = False
    
    return token_indent


def get_strings(data):
    strings = []
    mode = 0
    esc = False
    start = 0
    string = ''
    bigmode = 0
    comment = False
    e = 0
    while e < len(data):
        i = data[e]
        
        if i == '#' and mode == 0 and bigmode == 0:
            comment = True
        
        if comment and i == "\n":
            comment = False
        
        if comment:
            e += 1
            continue
        
        if e < len(data) - 3 and mode == 0:
            tri = data[e:e + 3]
            if bigmode == 0 and tri == '"""':
                string = ''
                start = e
                e += 3
                bigmode = 1
            
            elif bigmode == 1 and tri == '"""':
                strings.append((start, e + 2, string, True))
                e += 3
                bigmode = 0
                continue
            
            elif bigmode == 0 and tri == "'''":
                start = e
                string = ''
                e += 3
                bigmode = 2
            
            elif bigmode == 2 and tri == "'''":
                strings.append((start, e + 2, string, True))
                e += 3
                bigmode = 0
                continue
        
        if bigmode != 0:
            string += data[e]
            e += 1
            continue
        
        if esc is False:
            if mode == 0 and i == '"':
                start = e
                string = ''
                mode = 1
            
            elif mode == 1 and i == '"':
                strings.append((start, e + 1, string + '"', False))
                mode = 0
            
            elif mode == 0 and i == '\'':
                start = e
                string = ''
                mode = 2
            
            elif mode == 2 and i == '\'':
                strings.append((start, e + 1, string + '\'', False))
                mode = 0
            
            string += i
        
        else:
            string += i
        
        if i == '\\' and not esc:
            esc = True
        
        else:
            esc = False
        
        e += 1
    
    return strings



def check(path, folders, excludes, flags, checkstyle):
    state = 0
    files = sorted(os.listdir(path))
    for foo in files:
        if os.path.isdir(path + '/' + foo):
            if not foo in excludes:
                folders.append(path + '/' + foo)
            
            continue
        
        elif foo.endswith('~'):
            
            continue
        
        elif not foo.endswith('.py'):
            if foo.find('.') != -1:
                continue
            
            else:
                test = open(path + '/' + foo).read(20)
                if test[0:2] == '#!':
                    if test.find('python') == -1:
                        continue
                
                else:
                    continue
        
        checkstyle_con = checkstyle and foo == 'checkstyle'
        errors = ''
        in_def = False
        
        
        # get rid of string contents
        data = open(path + '/' + foo).read()
        strings = get_strings(data)
        for es in strings:
            start, end, string, big = es
            ess = data[start + 1:end - 1]
            
            ress = ''
            for ei in ess:
                if ei != '\n':
                    ress += '&'
                
                else:
                    ress += '\n'
            
            data = data[:start + 1] + ress + data[end - 1:]
        
        
        lines = data.split('\n')
        string = 0
        last_empty = True
        last_comment = False
        last_def = False
        last_class = False
        last_decorator = False
        comment_line_count = 0
        real_empty_count = 0
        empty_count = 0
        
        for l, d in enumerate(lines):
            f = d.strip()
            if string in (0, 1)  and string == 0:
                if f.find('\'\'\'') != -1:
                    
                    f = f[3:]
                    string = 1
                
                if f.find('\'\'\'') != -1 and string == 1:
                    string = 0
            
            if string in (0, 2)  and string == 0:
                if f.find('"""') != -1:
                    f = f[3:]
                    string = 2
                
                if f.find('"""') != -1 and string == 2:
                    string = 0
            
            if string == 0:
                indent = len(d) - len(d.lstrip())
                if not 'I' in flags:
                    token_indent = get_next(lines, l + 1)
                    if indent != token_indent and d.strip() == '':
                        errors += '%d: Invalid indentation of %d instead of %d\n' % (l + 1, indent, token_indent)
                
                elif d.strip() == '' and d != '':
                        errors += '%d: Invalid indentation of %d instead of 0\n' % (l + 1, indent)
            
            e = len(d.rstrip())
            if e != 0 and e != len(d.rstrip('\n')):
                errors += '%d: Whitespace at end of line\n' % (l + 1)
            
            if not 'D' in flags:
                if d.find('""') != -1 or d.find('("') != -1 or d.find('")') != -1 \
                   or d.find('",') != -1 or d.find(', "') != -1 or d.find('"\n') != -1 \
                   or d.find('["') != -1 or d.find('"]') != -1 or d.find('"""') != -1:
                    errors += '%d: Use of double quotes\n' % (l + 1)
            
            if 'S' in flags:
                if d.find('\'\'') != -1 or d.find('(\'') != -1 or d.find('\')') != -1 \
                   or d.find('\',') != -1 or d.find(', \'') != -1 or d.find('\'\n') != -1 \
                   or d.find('[\'') != -1 or d.find('\']') != -1 or d.find('\'\'\'') != -1:
                    errors += '%d: Use of single quotes\n' % (l + 1)
            
            if not 'O' in flags:
                if d.startswith('class')  and d.find('(') == -1:
                    errors += '%d: Use of old style class\n' % (l + 1)
            
            if d.find('!= None') != -1 or d.find('== None') != -1:
                errors += '%d: Testing for "None" without "is" operator\n' % (l + 1)
            
            if d.find('.has_key') != -1:
                errors += '%d: Use of "has_key()" instead of "in"\n' % (l + 1)
            
            if d.find('in range') != -1:
                errors += '%d: Use of "ran  ge()" instead of "xrange()"\n' % (l + 1)
            
            if d.find('in range(0') != -1 or d.find('in xrange(0') != -1:
                errors += '%d: Unneded 0 index in "(x)range()" call\n' % (l + 1)
            
            if d.find('in range(0, len(') != -1 or d.find('in range(0,len(') != -1 or d.find('in range(len(') != -1 \
               or d.find('in xrange(0, len(') != -1 or d.find('in xrange(0,len(') != -1:
                errors += '%d: Use of "for ... in (x)range" instead of "enumerate"\n' % (l + 1)
            
            if d.find('== True') != -1 and not 'B' in flags:
                errors += '%d: Testing for "True" without "is" operator\n' % (l + 1)
            
            if d.find('== False') != -1 and not 'B' in flags:
                errors += '%d: Testing for "False" without "is" operator\n' % (l + 1)
            
            if d.strip().startswith('print ') and 'P' in flags:
                errors += '%d: print statement\n' % (l + 1)
            
            if not 'C' in flags and not checkstyle_con:
                wp = d.find('= ""')
                wp2 = d.find('= \'\'')
                if (wp != -1 and d.find('= """') != wp):
                    var = d[:wp].rstrip()
                
                elif (wp2 != -1 and d.find('= \'\'\'') != wp2):
                    var = d[:wp2].rstrip()
                
                else:
                    var = None
                
                if var is not None:
                    pos = var.rfind(' ')
                    var2 = None
                    if pos != -1:
                        var2 = var[pos + 1:]
                    
                    else:
                        pos = var.rfind('.')
                        if pos != -1:
                            var2 = var[pos + 1:]
                        
                        else:
                            var2 = var
                    
                    var3 = var2
                    for o in var2:
                        if o != o.upper() and o != '_':
                            var3 = None
                            break
                    
                    if var3 is None:
                        errors += '%d: Assignment of emtpy string instead of constant\n' % (l + 1)
            
            if d.strip().startswith('def '):
                in_def = True
                mdef = ''
            
            if in_def:
                mdef += d.strip()
            
            if in_def and d.strip().endswith(':'):
                in_def = False
                mdef = mdef.strip('\n').strip()
                if mdef.find(' = ') != -1:
                    errors += '%d: Spaces around "=" in method defintion\n' % (l + 1)
            
            
            dd = d.strip()
            if dd == 'return':
                errors += '%d: Empty return statement\n' % (l + 1)
            
            if dd.startswith('def ') and not last_empty and not last_class and not last_decorator:
                errors += '%d: No newline infront of function/method definition\n' % (l + 1)
            
            if dd.startswith('class '):
                if empty_count < 2:
                    errors += '%d: Only %d newlines before class definition instead of 2\n' % (l + 1, empty_count)
                
                elif empty_count > 2:
                    errors += '%d: %d newlines infront of class definition instead of 2\n' % (l + 1, empty_count)
            
            if dd.startswith('else:') or dd.startswith('elif '):
                if not last_empty:
                    errors += '%d: No newline infront of else/elif statement\n' % (l + 1)
            
            if dd.startswith('#') and not last_comment and not last_empty and not last_def:
                errors += '%d: No newline infront of comment\n' % (l + 1)
            
            last_def = dd.startswith('def ') or dd.startswith('try:') \
                       or dd.startswith('except ') or dd.startswith('except:')
            
            last_comment = dd.startswith('#')
            last_class = dd.startswith('class')
            last_decorator = dd.startswith('@')
            
            # Comment lines
            if last_comment and dd.find('----') == -1:
                real_empty_count = 0
            
            if last_comment and dd.find('----') != -1:
                comment_line_count += 1
            
            else:
                if comment_line_count == 1:
                    if real_empty_count < 2:
                        errors += '%d: Only %d newline infront of full line comment\n' % (l, real_empty_count)
                    
                    elif real_empty_count > 2:
                        errors += '%d: %d newlines instead of 2 infront of full line comment\n' % (l , real_empty_count)
                
                elif comment_line_count == 2:
                    if real_empty_count < 2:
                        errors += '%d: Only %d newlines infront of double line comment instead of 2\n' % (l - 1, real_empty_count)
                    
                    elif real_empty_count > 2:
                        errors += '%d: %d newlines instead of 2 infront of double line comment\n' % (l - 1, real_empty_count)
                
                comment_line_count = 0
            
            if dd == '':
                empty_count += 1
                real_empty_count += 1
            
            elif not last_comment:
                empty_count = 0
                real_empty_count = 0
            
            if not last_comment:
                last_empty = dd == '' or dd.endswith('\\')
                
                ld = d.replace('+=', '  ').replace('-=', '  ').replace('-(', '  ').replace('*=', '  ').replace('/=', '  ').replace('//', '  ').replace('**', '  ').replace(', *', '    ').replace('(*', '').replace('lambda *', '')
                ld = number_regex.sub(lambda o: '', ld)
                cpos = ld.rfind('#')
                if cpos != -1:
                    if cpos > ld.rfind('\'') and cpos > ld.rfind('"'):
                        ld = dd[cpos:]
                
                if ld.count('+') != ld.count(' + '):
                    errors += '%d: Missing spaces around "+" operator\n' % (l + 1)
                
                if ld.count('-') != ld.count(' - '):
                    errors += '%d: Missing spaces around "-" operator\n' % (l + 1)
                
                if ld.count('*') != ld.count(' * '):
                    errors += '%d: Missing spaces around "*" operator\n' % (l + 1)
                
                if ld.count('/') != ld.count(' / '):
                    errors += '%d: Missing spaces around "/" operator\n' % (l + 1)
        
        count = -1
        for l in lines[::-1]:
            if l != '' and l != '\n':
                break
            
            if l == '' or l == '\n':
                count += 1
        
        if count == 0:
            errors += 'xx: File does not end with newline\n'
        
        elif count > 1:
            errors += 'xx: File ends with more than one newline\n'
        
        if errors != '':
            print '- %s/%s' % (path, foo)
            print errors
            state += 1
    
    return state


if __name__ == '__main__':
    if len(sys.argv) < 2:
        sys.argv = DEFAULTS
    
    '''
        -r = recursive
        
        -- options:
        C = don't check for constants
        O = don't check for old style classes
        D = don't check for double quotes
        S = check for single quotes
        I = indentation should be stripped
    
    '''
    
    sys.argv.pop(0)
    args = []
    rec = False
    flags = ''
    for i in sys.argv:
        if i == '-r':
            rec = True
        
        elif i.startswith('--'):
            flags = i[2:]
        
        else:
            args += [i]
    
    path = args[0]
    if not os.path.exists(path):
        print 'Directory not found: %s' % path
        exit(1)
    
    excludes = args[1:]
    folders = [path]
    state = 0
    if rec:
        p = 0
        while p < len(folders):
            state += check(folders[p], folders, excludes, flags, folders[p] == path)
            p += 1
    
    else:
        state += check(path, folders, excludes, flags, True)
    
    exit(state)

