"""Routines for interacting with SourceForge interfaces"""

import cgi
import re
import urllib
import urlparse

def urldecode(query):
    d = cgi.parse_qs(query)
    for k, v in d.items():
        if len(v) != 1:
            raise ValueError, "unexpected duplicate entry"
        d[k] = v[0]
    return d

class SummaryParser:
    rx_href = re.compile('HREF="(\S*\?[A-Za-z0-9=&_]+)"')

    VERBOSE = 0

    def __init__(self, root_url, funcs, verbose=None):
        if verbose:
            self.VERBOSE = verbose
        self.root_url = root_url
        self.offset = 0
        self.hrefs = []
        self.funcs = {}
        self.next = None
        for func in funcs:
            self.funcs[func] = 1

    def get_hrefs(self):
        return self.hrefs

    def load(self, _url, offset=None):
        url = urlparse.urljoin(self.root_url, _url)
        if self.VERBOSE:
            print "loading", url
        if offset is not None:
            self.offset = offset
        f = urllib.urlopen(url)
        resp = f.read()
        f.close()
        self.parse(resp)

    def parse(self, buf):
        for line in buf.split("\n"):
            line_offset = 0
            while 1:
                mo = self.rx_href.search(line, line_offset)
                if mo:
                    self.handle_href_match(mo, line)
                    line_offset = mo.end(1)
                else:
                    break
        if self.VERBOSE:
            print "found %d hrefs" % len(self.hrefs)
        if self.next:
            self.load_next()

    def handle_href_match(self, mo, line):
        query = mo.group(1)
        d = self.parse_query(query)
        self.handle_query(query, d, line)

    def handle_query(self, query, dict, line):
        if self.VERBOSE:
            print query
        if not dict.has_key('func'):
            return
        if dict['func'] == 'browse' and dict.has_key('offset'):
            off = int(dict['offset'])
            if off > self.offset:
                self.next = query, dict
        if self.keep_func(dict['func']):
            self.hrefs.append((query, dict, line))
            
    def keep_func(self, func):
        if self.funcs.has_key(func):
            return 1

    def parse_query(self, href):
        i = href.find("?")
        return urldecode(href[i+1:])
        
    def load_next(self):
        assert self.next is not None
        query, dict = self.next
        self.next = None
        new_offset = int(dict['offset'])
        self.load(query, new_offset)
