# python merge sort exploration.
# by Pegasus Epsilon <pegasus@pimpninjas.org>
# (C)2013-2023 No Rights Reserved (at the moment, anyway)
# I was told python 3 had greatly improved its speed and would significantly
# impact linear_merge_pop() below. This is sadly not the case.
# I still cannot recommend python, even disregarding the whitespace/formatting
# issue.
# I found the merge_sort_pop() method of sorted list merging copied on a random
# fucking teaching site on the web. I am incredibly disappointed. POP IS FUCKING
from __future__ import print_function
import random
import time
import dis
# the perly way...
def linear_merge_pop (l1, l2):
l = []
while l1 and l2: l.append(l1.pop(0) if l1[0] < l2[0] else l2.pop(0))
return l + l1 + l2
# using pop with an index is O(n) on the source lists.
# this makes linear_merge_pop O(2n) slow.
# so let's try without an index...
def linear_merge_insert (l1, l2):
l = []
while l1 and l2: l.insert(0, l1.pop() if l1[-1] > l2[-1] else l2.pop())
return l1 + l2 + l
# using insert this way is O(n) on the destination list.
# this makes linear_merge_insert O(n^2) slow. much worse than pop.
# what about a deque? those are fast, right?
from collections import deque
def linear_merge_deque (l1, l2):
d1 = deque(l1)
d2 = deque(l2)
l = []
while d1 and d2: l.append(d1.pop() if d1[0] < d2[0] else d2.pop())
# collections from python2 did not support concatenating deques
return l + list(d1) + list(d2)
# well sort of, but conversion times are nonzero, so you wind up losing overall.
# barely, especially compared to .pop(0) and .insert(0, ...), but still losing.
# the fastest way in python is to not modify the source lists at all.
def linear_merge_index (l1, l2):
lp1, lp2, ll1, ll2, l = 0, 0, len(l1), len(l2), []
while lp1 < ll1 and lp2 < ll2:
if l1[lp1] < l2[lp2]:
lp1 += 1
lp2 += 1
if lp1 < ll1: l.extend(l1[lp1:])
else: l.extend(l2[lp2:])
return l
# note that all of these sort times are significantly worse in python3 than
# python2 EXCEPT the index sort above, which is the same. The built-in sort
# method is still much faster in any version, so unless you absolutely have
# to, don't sort like this.
def merge_sort (fn, l):
ll = len(l)
if (ll > 1):
ll >>= 1
return fn(merge_sort(fn, l[:ll]), merge_sort(fn, l[ll:]))
return l
def merge_sort_pop (l):
merge_sort(linear_merge_pop, l)
def merge_sort_deque (l):
merge_sort(linear_merge_deque, l)
def merge_sort_insert (l):
merge_sort(linear_merge_insert, l)
def merge_sort_index (l):
merge_sort(linear_merge_index, l)
foo = [x for x in range(0, 200000)]
print("merge sort", len(foo), "shuffled unique items")
start = time.time()
bar = sorted(foo)
sort = time.time() - start
print("built-in sort (for reference) took", sort, "seconds")
bar = foo[:]
start = time.time()
bar = merge_sort_index(bar)
index = time.time() - start
print("index merge sort took", index, "seconds")
bar = foo[:]
start = time.time()
bar = merge_sort_pop(bar)
pop = time.time() - start
print("pop merge sort took", pop, "seconds")
print("pop merge seems to be about", (pop / index), "times slower than index merge")
bar = foo[:]
start = time.time()
bar = merge_sort_insert(bar)
insert = time.time() - start
print("insert merge sort took", insert, "seconds")
print("insert merge seems to be about", (insert / index), "times slower than index merge")
bar = foo[:]
start = time.time()
bar = merge_sort_deque(bar)
t_deque = time.time() - start
print("deque merge sort took", t_deque, "seconds")
print("deque merge seems to be about", (t_deque / index), "times slower than index merge")
r1 = random.randint(0, 1000)
r2 = random.randint(0, 1000)
l1 = [x for x in range(r1, r1 + 100000)]
l2 = [x for x in range(r2, r2 + 100000)]
print("merge two sorted lists of 100,000 items, with duplicates")
start = time.time()
linear_merge_index(l1, l2)
index = time.time() - start
print("index takes", index, "seconds")
print("len(l1) after fast:", len(l1), "(should be 100000)")
print("len(l2) after fast:", len(l2), "(should be 100000)")
start = time.time()
linear_merge_pop(l1, l2) # clobbers the input lists
pop = time.time() - start
print("pop takes", pop, "seconds")
print("len(l1) after slow:", len(l1), "(should be < 1000)")
print("len(l2) after slow:", len(l2), "(should be < 1000)")
print("pop seems to be about", (pop / index), "times slower than index")
print("sanity tests:")
print(linear_merge_index(['aa', 'xx', 'zz'], ['bb', 'cc', 'zz', 'zz']))
print(linear_merge_pop(['aa', 'xx', 'zz'], ['bb', 'cc', 'zz', 'zz']))
print(linear_merge_insert(['aa', 'xx', 'zz'], ['bb', 'cc', 'zz', 'zz']))
print(linear_merge_deque(['aa', 'xx', 'zz'], ['bb', 'cc', 'zz', 'zz']))
print(linear_merge_index(['aa', 'aa', 'aa'], ['zz']))
print(linear_merge_pop(['aa', 'aa', 'aa'], ['zz']))
print(linear_merge_index(['aa', 'aa', 'aa'], ['zz']))
print(linear_merge_deque(['aa', 'aa', 'aa'], ['zz']))