If you want to actually code up a solution in Python to get practice, here's one way:
# the indices in the tuples to be used as keys for determining repeats
# set this to whatever indices you would like (or even all of them)!
key_indices = [1, 4]
# for a given tuple tpl, construct a key consisting of the values in tpl
# that are found at the indices given in ki
def make_key(tpl, ki):
key_elements = []
for i in ki:
key_elements.append(tpl[i])
# need to return a tuple, as you cannot use a list as a key for a dict
return tuple(key_elements)
data = [
('1','200','300','500','2015-04-25 7:00:00'),
('1','200','500','500','2015-04-26 8:00:00'),
('1','200','500','500','2015-04-26 8:00:00'), # Repeated
('1','200','900','500','2015-04-27 9:00:00'),
('1','200','300','500','2015-04-28 17:00:00'),
('1','200','300','500','2015-04-28 17:00:00') # Repeated
]
# the data structure that we'll use to remember where we've seen keys before
memory = dict()
duplicates = set()
for i in range(0, len(data)):
# make the key for comparison
k = make_key(data[i], key_indices)
# find out where we've seen this before
# if nowhere else, return an empty list
previous_locations = memory.get(k, [])
# note that we have now seen this key at location i
previous_locations.append(i)
if (len(previous_locations) > 1):
duplicates.add(i)
# update the dict with the new location
memory[k] = previous_locations
print("Duplicate values found at: {}".format(list(duplicates)))
# and if you want to know which keys were duplicated where?
for k in memory.keys():
locs = memory[k]
if len(locs) > 1:
print("{}: {}".format(k, locs))
Output:
Duplicate values found at: [2, 5]
('200', '2015-04-28 17:00:00'): [4, 5]
('200', '2015-04-26 8:00:00'): [1, 2]