Fuller Web Development JavaScript, PHP, and Python Web Development by Braydon Fuller

Python Performance Part 1: Transforming Large Lists into Seperate Smaller Lists

Goal

Write a fast Python script that will take a large list and break it up into smaller sub-lists based on a set size; such as transforming [a,b,c,d,e,f] into [[a,b],[c,d],[e,f]].

Attempt 1: Map/Reduce (0.93s)

#import a list of 247,213 integers
from oids import oids

def pre(a):
    return (list(), a, 0)

def make_sets(a,b):
    set_size = 8
    if a[2] == set_size - 1 or a[0] == list():
        a[0].append([a[1]])
        return (a[0],b[1],0)
    else:
        a[0][-1].append(a[1])
        return (a[0],b[1],a[2]+1)

reduce(make_sets,map(pre,oids))

Times

real    0m0.935s
user    0m0.896s
sys     0m0.032s

real    0m0.948s
user    0m0.920s
sys     0m0.028s

real    0m0.929s
user    0m0.900s
sys     0m0.024s

Attempt 2: For-loop (0.41s)

from oids import oids

output = list()
count = 0
set_size = 8
for oid in oids:
    if count == set_size or output == list():
        output.append([oid])
        count = 0
    else:
        output[-1].append(oid)
        count = count + 1

Times

real    0m0.429s
user    0m0.404s
sys     0m0.024s

real    0m0.396s
user    0m0.384s
sys     0m0.012s

real    0m0.410s
user    0m0.396s
sys     0m0.012s

Attempt 3: Map (0.48s)

from oids import oids

output = list()
set_size = 8
count = [0]

def break_apart(a):
    if count[-1] == set_size or output == list():
        output.append([a])
        count.append(0)
    else:
        output[-1].append(a)
        count.append(count[-1] + 1)

map(break_apart,oids)

Timing

real    0m0.484s
user    0m0.476s
sys     0m0.012s

real    0m0.483s
user    0m0.464s
sys     0m0.016s

real    0m0.482s
user    0m0.452s
sys     0m0.028s

Attempt 4: Reduce (0.34s)

from oids import oids

def seperate(a,b,length=8):
    try:
        if len(a[-1]) == length:
            a.append([b])
            return a
        else:
            a[-1].append(b)
            return a
    except:
        return [[a,b]]

oids = reduce(seperate,oids)

Timing

real    0m0.323s
user    0m0.308s
sys     0m0.016s

real    0m0.329s
user    0m0.300s
sys     0m0.028s

real    0m0.353s
user    0m0.332s
sys     0m0.020s

No Comments Yet


There are no comments yet. You could be the first!

Leave a Comment