''' I'm trying to create bins based on the 'Position' column on df.groupby('Chrom') object. I've been struggling for hours on the last part of my code.The purpose of the script is to retrieve the lowest pvalue (p) for each group of the groupby object. df_lowestP = data.loc[data.groupby('Bin_labels')['p'].idxmin()] raised the following error 'ValueError: attempt to get argmin of an empty sequence' I'm not sure how to identify the empty sequences and how to sort it out. Thank you in advance for helping me out.
'''
cols= ['Chrom', 'Position', 'Marker', 'p']
vals = [['5A', 70.86, 'wsnp_Ex_c2474_4619730', 0.952967047],\
['2D', 80.46, 'Excalibur_c48871_1330', 0.603140475],\
['5B', 21.6, 'BS00010491_51', 0.68806773],\
['1D', 7.47, 'BobWhite_c4303_524', 0.14664623599999999],\
['6B', 52.36, 'RAC875_c54904_460', 0.747116322],\
['1B', 56.92, 'BS00023044_51', 0.793591901],\
['1B', 82.21, 'Excalibur_c60931_1260', 0.10407994],\
['7B', 61.76, 'Kukri_c12822_132', 0.9914339959999999],\
['6B', 99.03, 'BS00069285_51', 0.591267434],\
['6B', 65.68, 'Tdurum_contig42414_612', 0.6802424490000001],\
['2A', 0.0, 'wsnp_Ex_c1962_3696626', 0.016681088],\
['2D', 88.72, 'BS00031459_51', 0.06155213599999999],\
['3B', 60.02, 'RAC875_c2787_470', 0.359986433],\
['2B', 89.94, 'Excalibur_c16646_1254', 0.548888715],\
['6D', 48.85, 'Excalibur_c8134_1319', 0.4144683],\
['5B', 9.03, 'wsnp_Ex_c2459_4591587', 0.64927464],\
['5B', 123.43, 'BS00021868_51', 0.478334741],\
['2A', 194.55, 'wsnp_CAP11_rep_c6137_2841945', 0.67702519],\
['2B', 97.48, 'RAC875_c46788_60', 0.461550513],\
['1A', 73.26, 'CAP7_c3269_236', 0.8397169459999999],\
['1B', 148.6, 'RAC875_c9082_267', 0.992387433],\
['3A', 158.94, 'RAC875_c3084_415', 0.35521176200000004],\
['2A', 77.73, 'Kukri_c7139_1248', 0.865599771],\
['4A', 55.63, 'wsnp_Ex_c829_1620518', 0.628465917],\
['4A', 61.94, 'Excalibur_c96303_224', 0.8707948609999999],\
['1A', 85.79, 'Excalibur_c13183_128', 0.811616063],\
['3A', 79.22, 'wsnp_Ex_c32003_40728918', 0.45561401100000004],\
['7B', 29.87, 'Excalibur_c54242_704', 0.165007274],\
['6B', 75.73, 'Kukri_c60169_54', 0.893133473],\
['7B', 94.94, 'BS00065981_51', 0.45571674],\
['6A', 64.26, 'GENE-3933_586', 0.974289783],\
['1B', 0.0, 'Kukri_c44369_131', 0.311861047],\
['5D', 9.36, 'IACX2960', 0.9434061220000001],\
['6D', 48.34, 'Ex_c12134_939', 0.456392529],\
['3B', 97.16, 'wsnp_Ex_c18915_27811736', 0.785250982],\
['5B', 166.0, 'BS00098520_51', 0.6439036220000001],\
['2B', 96.44, 'RAC875_rep_c111100_447', 0.063204845],\
['1A', 51.88, 'Kukri_c3150_341', 0.17333784600000002],\
['5A', 139.08, 'BS00010179_51', 0.8453566920000001],\
['1B', 126.13, 'wsnp_Ku_c18881_28259811', 0.653103781],\
['1B', 128.64, 'Excalibur_c8585_701', 0.103269216],\
['2D', 44.17, 'Excalibur_c40820_335', 0.08864491],\
['1B', 80.3, 'Kukri_c5335_2165', 0.567401681],\
['3A', 97.15, 'RAC875_c62012_382', 0.407825859],\
['4B', 52.36, 'GENE-0037_298', 0.014111186999999999],\
['5A', 79.55, 'Tdurum_contig59930_366', 0.117716542],\
['5B', 128.25, 'BS00028082_51', 0.24427141],\
['7B', 109.87, 'wsnp_Ku_rep_c103690_90365438', 0.670004615],\
['2D', 89.68, 'BS00066526_51', 0.025921416],\
['7A', 81.25, 'IAAV5328', 0.340900239]]
df = pd.DataFrame(vals, columns=cols)
for gp,data in df.groupby('Chrom'):
pos_range = data['Position'].max() - data['Position'].min()
inter_value = 5
min_value = int(np.floor(data['Position'].min()))
max_value = int(np.ceil(data['Position'].max()))
intervals = [i for i in range(min_value, max_value+inter_value, inter_value)] #creating bins boundaries
labels = ['Bin_' + str(i) for i in range(1, len(intervals))] #labelling bins
#creating bin_labels column
bin_labels = pd.cut(x=data['Position'],
bins=intervals,
labels=labels,
include_lowest=True)
# creating bin boundaries column
bin_range = pd.cut(x=data['Position'],
bins=intervals,
include_lowest=True)
data.insert(3,'Bin_labels',bin_labels)
data.insert(4,'Bin_range',bin_range)
#Retrieving a single marker with lowest P for each bin
df_lowestP = data.loc[data.groupby('Bin_labels')['p'].idxmin()] #Here is the trouble