Python remove duplicates similar to bash uniq + sort

# citing: https://www.reddit.com/r/Python/comments/jfx7wy/how_to_quickly_remove_duplicates_from_a_list/

# Let's make some duplicates (one million numbers between 1 and 100)

from random import randrange
DUPLICATES = [ randrange(100) for _ in range(1_000_000) ]

### METHOD 1 ### 

# Not very efficient - Similar to "sort | uniq" but the original order is kept

unique = []
for element in DUPLICATES:
    if element not in unique:
        unique.append(element)
print(unique)

# Output:
[95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49]

### METHOD 2 ###

# Very efficient - Similar to "sort | uniq"

list(set(DUPLICATES))

# This works because sets contain unique items by definition

# Output:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]

### METHOD 3 ###

# Very efficient but order preserved
# Not sure which bash function this is similar to
# Resembles "sort | uniq" but with the original order preserved
# But sets are unordered! What if we need to preserve the order?
# Use this dict.fromkeys() trick!
list(dict.fromkeys(DUPLICATES))

# Output:
[95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49]

### METHOD 4 ###

# Older Python - Similar to "uniq | sort"
# But it only works for Python 3.6 and above
# For Python 2.7 and 3.0-3.5, use OrderedDict:from collections import OrderedDict
list(OrderedDict.fromkeys(DUPLICATES))

#### METHOD 5 ###

# Crunch duplicates as they appear and show count
# Similar to bashes "uniq -c"
# I will show you how to do "uniq -c" on a string
# Just change the input to a list and it will work on a list

# METHOD 5.1 #

from itertools import groupby
input = "Raaanndommmm Leetters"
for i,v in groupby(input):
   print(len(list(v)),i)

# Output:
1 R
3 a
2 n
1 d
1 o
4 m
1
1 L
2 e
2 t
1 e
1 r
1 s

# METHOD 5.2 #

# Or you can return a list for an output:

l=[]
for i,v in groupby(input):
   l.append(str(len(list(v)))+" "+str(i))
print(l)

# Output:
['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1  ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s']

#  METHOD 5.3 #

# Same solution as a one liner:

print([ str(len(list(v)))+" "+str(i) for i,v in groupby(input)])

# Output:
['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1  ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s']

### METHOD 6 ###

# Crunch out duplicates but dont show count

# METHOD 6.1 #

from itertools import groupby
input = "Raaanndommmm Leetters"
for i,v in groupby(input):
   print(str(i))

# Output:
R
a
n
d
o
m

L
e
t
e
r
s

# METHOD 6.2 #

l=[]
for i,v in groupby(input):
   l.append(str(i))
print(l)

# Output:
['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's']

# METHOD 6.3 #

print([ str(i) for i,v in groupby(input)])
# Output:

['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's']
infotinks

My Notes, Articles & Guides for Linux, Windows and Networking.

Python remove duplicates similar to bash uniq + sort

Leave a Reply Cancel reply