# timings of the new sets module

from timeit import Timer

setup = """
from random import randrange, seed
from sets import Set, ImmutableSet
seed(845957276943582345234523452734241L)
n = 1000
a = [randrange(n) for i in xrange(n)]
b = [randrange(n) for i in xrange(n)]
seta = set(a)
Seta = Set(a)
setb = set(b)
dicta = dict.fromkeys(a, True)
dictb = dict.fromkeys(b, True)
NONE = None
TRUE = True
listseta = list(seta)
listsetb = list(setb)
dac = dicta.__contains__
dah = dicta.has_key
sac = seta.__contains__
sadd = seta.add
setc = set(seta)
for _ in setc: pass
seta.discard(None)
setb.discard(None)
"""

"""
If you don't already have a set,
   then passing the list form is always better or as good a making a temp set

If you do have a set or dict, then passing it is better or as good (w/in 20)% as a listset()

The w/20 is because looping over sets is slower than looping over a list.

Need more study on difference (first result) and sym_diff_up (second result)

--------------

set(a).binop(b)  vs set(a).binop(set(b))
   when =, it means internally b is first converted to a set if not already
   when <, it means the binop uses an algo that doesn't make a set first:
       diff, diff_up, inter, inter_up, union, union_up

set(a).binop(listsetb) vs set(a).binop(setb)
   when >>, it means that the list had to be coerced to a set first
       diff, sym_diff_up
   when >, it means that faster merge was used instead of iter
       union, union_up,

   when < 1/6, just means looping over dicts is slower than lists which is ok
       inter, inter_up, diff_up
   or something else
       sym_diff has ?flawed strategy of makingset and passing to sym_up
            B:makeset, A:sparseloop  A:contains  A-m:adds m:deletes
        vs. B:makeset, A:sparseloop  A:contains A-m:adds
                       B:sparseloop  B:contains B-m:adds
            delete B

"""

# set_diff use of iterable vs set
stmts = [
    'list(seta)',
    'list(dicta)',
    'dicta.keys()',
    'list(listseta)',
    'list(setb)',
    'setc.__contains__(None); list(setc)',
    'setc.discard(None); list(setc)',
    'frozenset(setb)',
    'frozenset(listsetb)',
    "'< means binop bypassed forcing to set'",
    "seta.difference(b)",
    "seta.difference(set(b))",
    "Seta.difference(b)",
    "seta.union(b)",
    "seta.union(set(b))",
    "Seta.union(b)",
    "seta.intersection(b)",
    "seta.intersection(set(b))",
    "Seta.intersection(b)",
    "seta.symmetric_difference(b)",
    "seta.symmetric_difference(set(b))",
    "Seta.symmetric_difference(b)",

    "seta.difference_update(b)",
    "seta.difference_update(set(b))",
    "Seta.difference_update(b)",
    "seta.update(b)",
    "seta.update(set(b))",
    "Seta.update(b)",
    "seta.intersection_update(b)",
    "seta.intersection_update(set(b))",
    "Seta.intersection_update(b)",
    "seta.symmetric_difference_update(b)",
    "seta.symmetric_difference_update(set(b))",
    "Seta.symmetric_difference_update(b)",

    "'>> due to set coercion; > merge faster than iter; < 1/6 dict looping is slow'",
    "seta.difference(listsetb)",
    "seta.difference(setb)",
    "seta.difference(dictb)",
    "seta.union(listsetb)",
    "seta.union(setb)",
    "seta.union(dictb)",
    "seta.intersection(listsetb)",
    "seta.intersection(setb)",
    "seta.intersection(dictb)",
    "seta.symmetric_difference(listsetb)",
    "seta.symmetric_difference(setb)",
    "seta.symmetric_difference(dictb)",

    "seta.difference_update(listsetb)",
    "seta.difference_update(setb)",
    "seta.difference_update(dictb)",
    "seta.update(listsetb)",
    "seta.update(setb)",
    "seta.update(dictb)",
    "seta.intersection_update(listsetb)",
    "seta.intersection_update(setb)",
    "seta.intersection_update(dictb)",
    "seta.symmetric_difference_update(listsetb)",
    "seta.symmetric_difference_update(setb)",
    "seta.symmetric_difference_update(dictb)",

    "for i in xrange(500): seta.add(i)",
    "for i in xrange(500): Seta.add(i)",
    "for i in xrange(500): dicta[i] = NONE",
    "for i in xrange(500): sadd(i)",
    "set(b)",
    "Set(b)",
    "dict.fromkeys(b, TRUE)",

    "set(listsetb)",
    "set(setb)",
    "set(dictb)",

    "for i in xrange(200): i in seta",
    "for i in xrange(200): i in Seta",
    "for i in xrange(200): i in dicta",

    "for i in xrange(200): dah(i)",
    "for i in xrange(200): dac(i)",
    "for i in xrange(200): sac(i)",

    "for i in xrange(200): seta.discard(i)",
    "for i in xrange(20):  Seta.discard(i)",
    "for i in xrange(200): dicta.pop(i, NONE)",

# Repeat first five to check validity of timings
    'list(seta)',
    'list(dicta)',
    'dicta.keys()',
    'list(listseta)',
    'list(setb)',

# Uniquification,
    'list(set(a))',
    'dict.fromkeys(a).keys()',
    'list(dict.fromkeys(a))',
    'list(set(b))',
    'dict.fromkeys(b).keys()',
    'list(dict.fromkeys(b))',
]

from time import time
import gc
start = time()

for stmt in stmts:
    gc.collect()
    print "%f\t%s" % (min(Timer(stmt, setup).repeat(9, 3500)), stmt)

end = time()
print end-start, "! total elapsed"
