# citing: https://www.reddit.com/r/Python/comments/jfx7wy/how_to_quickly_remove_duplicates_from_a_list/
# Let's make some duplicates (one million numbers between 1 and 100)
from random import randrange
DUPLICATES = [ randrange(100) for _ in range(1_000_000) ]
### METHOD 1 ###
# Not very efficient - Similar to "sort | uniq" but the original order is kept
unique = []
for element in DUPLICATES:
if element not in unique:
unique.append(element)
print(unique)
# Output:
[95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49]
### METHOD 2 ###
# Very efficient - Similar to "sort | uniq"
list(set(DUPLICATES))
# This works because sets contain unique items by definition
# Output:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
### METHOD 3 ###
# Very efficient but order preserved
# Not sure which bash function this is similar to
# Resembles "sort | uniq" but with the original order preserved
# But sets are unordered! What if we need to preserve the order?
# Use this dict.fromkeys() trick!
list(dict.fromkeys(DUPLICATES))
# Output:
[95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49]
### METHOD 4 ###
# Older Python - Similar to "uniq | sort"
# But it only works for Python 3.6 and above
# For Python 2.7 and 3.0-3.5, use OrderedDict:from collections import OrderedDict
list(OrderedDict.fromkeys(DUPLICATES))
#### METHOD 5 ###
# Crunch duplicates as they appear and show count
# Similar to bashes "uniq -c"
# I will show you how to do "uniq -c" on a string
# Just change the input to a list and it will work on a list
# METHOD 5.1 #
from itertools import groupby
input = "Raaanndommmm Leetters"
for i,v in groupby(input):
print(len(list(v)),i)
# Output:
1 R
3 a
2 n
1 d
1 o
4 m
1
1 L
2 e
2 t
1 e
1 r
1 s
# METHOD 5.2 #
# Or you can return a list for an output:
l=[]
for i,v in groupby(input):
l.append(str(len(list(v)))+" "+str(i))
print(l)
# Output:
['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1 ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s']
# METHOD 5.3 #
# Same solution as a one liner:
print([ str(len(list(v)))+" "+str(i) for i,v in groupby(input)])
# Output:
['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1 ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s']
### METHOD 6 ###
# Crunch out duplicates but dont show count
# METHOD 6.1 #
from itertools import groupby
input = "Raaanndommmm Leetters"
for i,v in groupby(input):
print(str(i))
# Output:
R
a
n
d
o
m
L
e
t
e
r
s
# METHOD 6.2 #
l=[]
for i,v in groupby(input):
l.append(str(i))
print(l)
# Output:
['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's']
# METHOD 6.3 #
print([ str(i) for i,v in groupby(input)])
# Output:
['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's']