import sys, os, urllib, sha
#os.environ['DJANGO_SETTTINGS_MODULE'] = 'pycon.settings'
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__) + '../../'))
import final
import init07
from elementtree import HTMLTreeBuilder
from django.contrib.auth.models import User, Group
from pycon.usermgr.models import *
from pycon.propmgr.models import *
from django.db import transaction

ZOPE_BASE_URL = r'http://us.pycon.org/zope/talks/talkLocate?year=2006&id='
ENCODING = 'utf8'
DefaultUser = 'UnknownAuthor' ## gets replaced with a django user object
DefaultGroup = 'Submitters'
passwd = 'pycon'
userCache = {}

def wget_talk(dir, id):
    def reporthook(*a): print a
    url = ZOPE_BASE_URL + id
    fname = os.sep.join([dir, id + '.html'])
    print url, "->", fname
    urllib.urlretrieve(url, fname, reporthook)
    
def download_html(download_dir='.'):
    for day in final.schedule.itervalues():
        for room, time, duration, talk in day:
            if talk.startswith('#'):
                wget_talk(download_dir, talk[1:])


class UserData(object):
    def __init__(self, elem=None):
        self.affiliation = None
        self.first = None
        self.last = None
        self.honorific = None
        self.middle = None
        self.suffix = None
        self.username = None
        self.init = False
        if elem is not None:
            self.init=True
            authdata = elem.text.strip()
            if '/' in authdata:
                authdata, aff = authdata.split('/', 1)
                self.affiliation = aff.strip()
                authdata = authdata.strip()
            namedata = authdata.split()
            if len(namedata) == 1:
                self.first = namedata[0]
                self.username = namedata[0].replace('.', '_')
                return
            if namedata[0] in HONORIFIC:
                self.honorific = namedata.pop(0)
            if namedata[-1] in SUFFIX:
                self.suffix - namedata.pop()
            if len(namedata) >= 3:
                mids = [i for i, x in enumerate(namedata)
                                        if '.' in x or '(' in x or '"' in x]
                if len(mids):
                    self.middle = namedata.pop(mids[0])
            self.first = namedata.pop(0)
            if len(namedata):
                if namedata[-1].startswith('(') and namedata[-1].endswith(')'):
                    self.username = namedata.pop()[1:-1].replace(
                            ' ', '_').replace('.', '_')
            self.last = ' '.join(namedata)
            if not self.username:
                self.username = (self.first +' '+ self.last).replace(
                                                    ' ', '_').replace('.', '_')

    def __repr__(self):
        namedata = [ self.honorific, self.first, self.middle, self.last,
                     self.suffix ]
        namedata = [ x for x in namedata if x ]
        namedata = ' '.join(namedata)
        return namedata + ' (' + self.username + ')'
    
class TalkData(object):
    elemclasses = [ 'talktitle', 'talkauthor', 'talkabstract', 'talkwebsite' ]
    class SiteError(Exception):
        pass
    def __init__(self, title, dur=0, htmldir='.'):
        self.duration = dur
        if title.startswith('#'):
            self.switch = {}
            for cl in self.elemclasses:
                self.switch[cl] = getattr(self, 'parse_' + cl)
            self.switch[None] = lambda a: None
            self.summary = ''
            self.authors = []
            self.init_from_file(htmldir +os.sep+ title[1:] + '.html')
        else:
            self.title = title
            self.summary = ''
            self.authors = []
            
    def init_from_file(self, htmlfile):
        try:
            tree = HTMLTreeBuilder.parse(htmlfile)
            errorh2 = tree.find('body').find('h2')
            if errorh2 is not None and errorh2.text == 'Site Error':
                raise self.SiteError, 'bad talk id'
            talktd = tree.find('body').find('table').findall('tr')[2].find('td')
            assert talktd.attrib.get('id', None) == 'main', "Failed to find main part of page for: %s" % htmlfile
            details = [div for div in talktd.find('span').findall('div') if div.attrib.get('id', None) == 'TalkDetails' ][0]
            details = [div for div in details.findall('div') if div.attrib.get('id', None) == 'details' ][0]
            talktable = details.find('table')
            for elem in talktable.getiterator():
                self.parse(elem)
        except:
            print "Failed while parsing:", htmlfile
            raise

    def parse(self, elem):
        if elem.get('class') not in self.elemclasses: return
        self.switch[elem.get('class')](elem)
    
    def parse_talktitle(self, elem):
        self.title = elem.text.strip()
        
    def parse_talkauthor(self, elem):
        self.authors.append(UserData(elem))
        return
        authdata = elem.text.strip()
        authdata = [info.strip() for info in authdata.split('/', 1)]
        if len(authdata) != 2: authdata.append(None)
        namedata = authdata[0].split()
        if len(namedata) < 2: namedata.append('')
        namedata = [namedata[0], ' '.join(namedata[1:])]
        authdata = namedata + authdata[1:]
        self.authors.append(tuple(authdata))
        
    def parse_talkabstract(self, elem):
        def addinner(child):
            res = child.text
            for sub in child.getchildren():
                res += addchild(sub)
            return res
        def addchild(child):
            raw = '<' + child.tag + ' '
            raw += ' '.join(key + '=' + repr(val) for key, val in child.items())
            raw += '>'
            raw += addinner(child)
            raw += '</' + child.tag + '>'
            return raw
        
        self.summary = addinner(elem.find('p'))
        
    def parse_talkwebsite(self, elem):
        pass
    
    def __str__(self):
        return self.title

def genTalk(talk, duration, htmldir='.'):
    try:
        return TalkData(talk, duration, htmldir)
    except TalkData.SiteError:
        return None

def load_talk_data(downloadhtml=False, html_dir='.'):
    if downloadhtml:
        download_html(html_dir)
    talks = filter(None, (genTalk(talk, duration, html_dir) for day in final.schedule.itervalues() for room, time, duration, talk in day ))
    authors = set()
    authwaff = set()
    authmap = {}
    for talk in talks:
        aset = set((x.first, x.last) for x in talk.authors)
        authwaff |= set((x.first, x.last, x.affiliation) for x in talk.authors)
        authors |= aset
        assert len(aset) == len(talk.authors), "Same author listed more than once in talk: %s" % talk.title
        for auth in talk.authors:
            first_last = (auth.first, auth.last)
            if first_last not in authmap:
                authmap[first_last] = auth
    if len(authwaff) != len(authors):
        print "Some authors have more than one affilliation"
        print "Only the first found will be used"
        print "%d authors, %d auth w/aff" % (len(authors), len(authwaff))
    return talks, authmap.values()

@transaction.commit_on_success
def import_user(auth):
    global userCache, passwd, DefaultUser, DefaultGroup
    first, last = auth.first, auth.last
    cached = userCache.get((first, last), None)
    if cached is not None:
        return cached
    if (first is None and last is None) or (first == '' and last == ''):
        userCache[(first, last)] = DefaultUser
        return DefaultUser
    username = auth.username.encode(ENCODING)
    try:
        #print "Username:", username
        user = User.objects.get(username__exact = username)
    except User.DoesNotExist:
        print "Adding User:", username
        user = User.objects.create_user(username, 'foo@bar.com', passwd)
        user.first_name = first.encode(ENCODING)
        if last:
            user.last_name = last.encode(ENCODING)
        user.save()

        #DefaultGroup.user_set.add(user)
        prof = UserProfile()
        prof.user =user
        if auth.affiliation:
            prof.affiliation = auth.affiliation.encode(ENCODING)
        if auth.honorific:
            prof.honorific = auth.honorific.encode(ENCODING)
        if auth.middle:
            prof.middle = auth.middle.encode(ENCODING)
        if auth.suffix:
            prof.suffix = auth.suffix.encode(ENCODING)
        prof.save()
    userCache[(first, last)] = user
    return user

@transaction.commit_on_success
def import_talk(talk):
    try:
        Proposal.objects.get(title__exact=talk.title)
    except Proposal.DoesNotExist:
        if not len(talk.authors):
            print "Skipping UnAuthored Talk:", talk.title
            return
        print "Adding Talk:", talk.title
    else:
        return
    prop = Proposal()
    prop.title = talk.title.encode(ENCODING)
    prop.duration = talk.duration
    prop.summary = talk.summary.encode(ENCODING)
    
    prop.status = 'A'
    prop.published = True
        
    if len(talk.authors):
        bFirst = True
        for auth in talk.authors:
            author = import_user(auth)
            if bFirst:
                prop.submitter = author
                prop.save()
                bFirst = False
            else:
                author.proposals_coauthored_set.add(prop)
    else:
        prop.submitter = DefaultUser
    
    prop.reviewers = Group.objects.get(
            name='Reviewers').user_set.exclude(
                pk=prop.submitter.id).order_by('?')[:3]   

    prop.save()

def init_globals():
    global DefaultGroup, DefaultUser
    try:
        DefaultGroup = Group.objects.get(name__exact=DefaultGroup)
    except Group.DoesNotExist:
        print "Adding Group:", DefaultGroup
        dg = Group()
        dg.name = DefaultGroup
        dg.save()
        DefaultGroup = dg
    try:
        DefaultUser = User.objects.get(username__exact=DefaultUser)
    except User.DoesNotExist:
        user = UserData()
        user.first = 'Author'
        user.last = 'Unknown'
        user.username = DefaultUser
        DefaultUser = import_user(user)
    #passwd = 'sha::' + sha.sha(passwd).hexdigest()

def import_all(htmldir):
    init07.main()
    download = False
    if not os.path.exists(htmldir):
        os.mkdir(htmldir)
        download = True
    talks, authors = load_talk_data(download, htmldir)
    for auth in authors:
        import_user(auth)
    for talk in talks:
        import_talk(talk)
        

init_globals()

if __name__ == '__main__':
    import_all(os.path.abspath(os.path.dirname(sys.argv[0])) +os.sep+ 'talks')
