2

I cannot figure this out. Why should the order of group_A matter during this situation? Ar_groupA and Ar_groupB are indexed appropriately and are called in the correct order.

I'm using pandas v0.24.2

Please help me understand why the of the data in the groupings matter at all.

from collections import *
import itertools

def pairwise_logfc(df_data, group_A, group_B):
    # Init
    X = df_data.copy()
    attr_labels = X.columns

    # Log Transform
    df_log = np.log2(X)

    # Groups
    Ar_groupA = df_log.loc[group_A,:].values
    Ar_groupB = df_log.loc[group_B,:].values

    # Pairwise profiles
    logfc_profiles = list()
    for i in  range(len(group_A)):
        u = Ar_groupA[i,:]
        for j in range(len(group_B)):
            v = Ar_groupB[j,:]
            logfc_profiles.append(v - u)

    groups = (group_B, group_A)
    labels = [*itertools.product(*groups)]
    return pd.DataFrame(logfc_profiles, index=pd.MultiIndex.from_tuples(labels,names=["group_B", "group_A"]), columns=attr_labels)

# Load data
data = OrderedDict([('sepal_length', OrderedDict([('iris_0', 5.1), ('iris_1', 4.9), ('iris_10', 5.4), ('iris_100', 6.3), ('iris_101', 5.8), ('iris_102', 7.1), ('iris_103', 6.3), ('iris_104', 6.5), ('iris_105', 7.6), ('iris_106', 4.9), ('iris_107', 7.3), ('iris_108', 6.7), ('iris_109', 7.2), ('iris_11', 4.8), ('iris_110', 6.5), ('iris_111', 6.4), ('iris_112', 6.8), ('iris_113', 5.7), ('iris_114', 5.8), ('iris_115', 6.4), ('iris_116', 6.5), ('iris_117', 7.7), ('iris_118', 7.7), ('iris_119', 6.0), ('iris_12', 4.8), ('iris_120', 6.9), ('iris_121', 5.6), ('iris_122', 7.7), ('iris_123', 6.3), ('iris_124', 6.7), ('iris_125', 7.2), ('iris_126', 6.2), ('iris_127', 6.1), ('iris_128', 6.4), ('iris_129', 7.2), ('iris_13', 4.3), ('iris_130', 7.4), ('iris_131', 7.9), ('iris_132', 6.4), ('iris_133', 6.3), ('iris_134', 6.1), ('iris_135', 7.7), ('iris_136', 6.3), ('iris_137', 6.4), ('iris_138', 6.0), ('iris_139', 6.9), ('iris_14', 5.8), ('iris_140', 6.7), ('iris_141', 6.9), ('iris_142', 5.8), ('iris_143', 6.8), ('iris_144', 6.7), ('iris_145', 6.7), ('iris_146', 6.3), ('iris_147', 6.5), ('iris_148', 6.2), ('iris_149', 5.9), ('iris_15', 5.7), ('iris_16', 5.4), ('iris_17', 5.1), ('iris_18', 5.7), ('iris_19', 5.1), ('iris_2', 4.7), ('iris_20', 5.4), ('iris_21', 5.1), ('iris_22', 4.6), ('iris_23', 5.1), ('iris_24', 4.8), ('iris_25', 5.0), ('iris_26', 5.0), ('iris_27', 5.2), ('iris_28', 5.2), ('iris_29', 4.7), ('iris_3', 4.6), ('iris_30', 4.8), ('iris_31', 5.4), ('iris_32', 5.2), ('iris_33', 5.5), ('iris_34', 4.9), ('iris_35', 5.0), ('iris_36', 5.5), ('iris_37', 4.9), ('iris_38', 4.4), ('iris_39', 5.1), ('iris_4', 5.0), ('iris_40', 5.0), ('iris_41', 4.5), ('iris_42', 4.4), ('iris_43', 5.0), ('iris_44', 5.1), ('iris_45', 4.8), ('iris_46', 5.1), ('iris_47', 4.6), ('iris_48', 5.3), ('iris_49', 5.0), ('iris_5', 5.4), ('iris_50', 7.0), ('iris_51', 6.4), ('iris_52', 6.9), ('iris_53', 5.5), ('iris_54', 6.5), ('iris_55', 5.7), ('iris_56', 6.3), ('iris_57', 4.9), ('iris_58', 6.6), ('iris_59', 5.2), ('iris_6', 4.6), ('iris_60', 5.0), ('iris_61', 5.9), ('iris_62', 6.0), ('iris_63', 6.1), ('iris_64', 5.6), ('iris_65', 6.7), ('iris_66', 5.6), ('iris_67', 5.8), ('iris_68', 6.2), ('iris_69', 5.6), ('iris_7', 5.0), ('iris_70', 5.9), ('iris_71', 6.1), ('iris_72', 6.3), ('iris_73', 6.1), ('iris_74', 6.4), ('iris_75', 6.6), ('iris_76', 6.8), ('iris_77', 6.7), ('iris_78', 6.0), ('iris_79', 5.7), ('iris_8', 4.4), ('iris_80', 5.5), ('iris_81', 5.5), ('iris_82', 5.8), ('iris_83', 6.0), ('iris_84', 5.4), ('iris_85', 6.0), ('iris_86', 6.7), ('iris_87', 6.3), ('iris_88', 5.6), ('iris_89', 5.5), ('iris_9', 4.9), ('iris_90', 5.5), ('iris_91', 6.1), ('iris_92', 5.8), ('iris_93', 5.0), ('iris_94', 5.6), ('iris_95', 5.7), ('iris_96', 5.7), ('iris_97', 6.2), ('iris_98', 5.1), ('iris_99', 5.7)])), ('sepal_width', OrderedDict([('iris_0', 3.5), ('iris_1', 3.0), ('iris_10', 3.7), ('iris_100', 3.3), ('iris_101', 2.7), ('iris_102', 3.0), ('iris_103', 2.9), ('iris_104', 3.0), ('iris_105', 3.0), ('iris_106', 2.5), ('iris_107', 2.9), ('iris_108', 2.5), ('iris_109', 3.6), ('iris_11', 3.4), ('iris_110', 3.2), ('iris_111', 2.7), ('iris_112', 3.0), ('iris_113', 2.5), ('iris_114', 2.8), ('iris_115', 3.2), ('iris_116', 3.0), ('iris_117', 3.8), ('iris_118', 2.6), ('iris_119', 2.2), ('iris_12', 3.0), ('iris_120', 3.2), ('iris_121', 2.8), ('iris_122', 2.8), ('iris_123', 2.7), ('iris_124', 3.3), ('iris_125', 3.2), ('iris_126', 2.8), ('iris_127', 3.0), ('iris_128', 2.8), ('iris_129', 3.0), ('iris_13', 3.0), ('iris_130', 2.8), ('iris_131', 3.8), ('iris_132', 2.8), ('iris_133', 2.8), ('iris_134', 2.6), ('iris_135', 3.0), ('iris_136', 3.4), ('iris_137', 3.1), ('iris_138', 3.0), ('iris_139', 3.1), ('iris_14', 4.0), ('iris_140', 3.1), ('iris_141', 3.1), ('iris_142', 2.7), ('iris_143', 3.2), ('iris_144', 3.3), ('iris_145', 3.0), ('iris_146', 2.5), ('iris_147', 3.0), ('iris_148', 3.4), ('iris_149', 3.0), ('iris_15', 4.4), ('iris_16', 3.9), ('iris_17', 3.5), ('iris_18', 3.8), ('iris_19', 3.8), ('iris_2', 3.2), ('iris_20', 3.4), ('iris_21', 3.7), ('iris_22', 3.6), ('iris_23', 3.3), ('iris_24', 3.4), ('iris_25', 3.0), ('iris_26', 3.4), ('iris_27', 3.5), ('iris_28', 3.4), ('iris_29', 3.2), ('iris_3', 3.1), ('iris_30', 3.1), ('iris_31', 3.4), ('iris_32', 4.1), ('iris_33', 4.2), ('iris_34', 3.1), ('iris_35', 3.2), ('iris_36', 3.5), ('iris_37', 3.6), ('iris_38', 3.0), ('iris_39', 3.4), ('iris_4', 3.6), ('iris_40', 3.5), ('iris_41', 2.3), ('iris_42', 3.2), ('iris_43', 3.5), ('iris_44', 3.8), ('iris_45', 3.0), ('iris_46', 3.8), ('iris_47', 3.2), ('iris_48', 3.7), ('iris_49', 3.3), ('iris_5', 3.9), ('iris_50', 3.2), ('iris_51', 3.2), ('iris_52', 3.1), ('iris_53', 2.3), ('iris_54', 2.8), ('iris_55', 2.8), ('iris_56', 3.3), ('iris_57', 2.4), ('iris_58', 2.9), ('iris_59', 2.7), ('iris_6', 3.4), ('iris_60', 2.0), ('iris_61', 3.0), ('iris_62', 2.2), ('iris_63', 2.9), ('iris_64', 2.9), ('iris_65', 3.1), ('iris_66', 3.0), ('iris_67', 2.7), ('iris_68', 2.2), ('iris_69', 2.5), ('iris_7', 3.4), ('iris_70', 3.2), ('iris_71', 2.8), ('iris_72', 2.5), ('iris_73', 2.8), ('iris_74', 2.9), ('iris_75', 3.0), ('iris_76', 2.8), ('iris_77', 3.0), ('iris_78', 2.9), ('iris_79', 2.6), ('iris_8', 2.9), ('iris_80', 2.4), ('iris_81', 2.4), ('iris_82', 2.7), ('iris_83', 2.7), ('iris_84', 3.0), ('iris_85', 3.4), ('iris_86', 3.1), ('iris_87', 2.3), ('iris_88', 3.0), ('iris_89', 2.5), ('iris_9', 3.1), ('iris_90', 2.6), ('iris_91', 3.0), ('iris_92', 2.6), ('iris_93', 2.3), ('iris_94', 2.7), ('iris_95', 3.0), ('iris_96', 2.9), ('iris_97', 2.9), ('iris_98', 2.5), ('iris_99', 2.8)])), ('petal_length', OrderedDict([('iris_0', 1.4), ('iris_1', 1.4), ('iris_10', 1.5), ('iris_100', 6.0), ('iris_101', 5.1), ('iris_102', 5.9), ('iris_103', 5.6), ('iris_104', 5.8), ('iris_105', 6.6), ('iris_106', 4.5), ('iris_107', 6.3), ('iris_108', 5.8), ('iris_109', 6.1), ('iris_11', 1.6), ('iris_110', 5.1), ('iris_111', 5.3), ('iris_112', 5.5), ('iris_113', 5.0), ('iris_114', 5.1), ('iris_115', 5.3), ('iris_116', 5.5), ('iris_117', 6.7), ('iris_118', 6.9), ('iris_119', 5.0), ('iris_12', 1.4), ('iris_120', 5.7), ('iris_121', 4.9), ('iris_122', 6.7), ('iris_123', 4.9), ('iris_124', 5.7), ('iris_125', 6.0), ('iris_126', 4.8), ('iris_127', 4.9), ('iris_128', 5.6), ('iris_129', 5.8), ('iris_13', 1.1), ('iris_130', 6.1), ('iris_131', 6.4), ('iris_132', 5.6), ('iris_133', 5.1), ('iris_134', 5.6), ('iris_135', 6.1), ('iris_136', 5.6), ('iris_137', 5.5), ('iris_138', 4.8), ('iris_139', 5.4), ('iris_14', 1.2), ('iris_140', 5.6), ('iris_141', 5.1), ('iris_142', 5.1), ('iris_143', 5.9), ('iris_144', 5.7), ('iris_145', 5.2), ('iris_146', 5.0), ('iris_147', 5.2), ('iris_148', 5.4), ('iris_149', 5.1), ('iris_15', 1.5), ('iris_16', 1.3), ('iris_17', 1.4), ('iris_18', 1.7), ('iris_19', 1.5), ('iris_2', 1.3), ('iris_20', 1.7), ('iris_21', 1.5), ('iris_22', 1.0), ('iris_23', 1.7), ('iris_24', 1.9), ('iris_25', 1.6), ('iris_26', 1.6), ('iris_27', 1.5), ('iris_28', 1.4), ('iris_29', 1.6), ('iris_3', 1.5), ('iris_30', 1.6), ('iris_31', 1.5), ('iris_32', 1.5), ('iris_33', 1.4), ('iris_34', 1.5), ('iris_35', 1.2), ('iris_36', 1.3), ('iris_37', 1.4), ('iris_38', 1.3), ('iris_39', 1.5), ('iris_4', 1.4), ('iris_40', 1.3), ('iris_41', 1.3), ('iris_42', 1.3), ('iris_43', 1.6), ('iris_44', 1.9), ('iris_45', 1.4), ('iris_46', 1.6), ('iris_47', 1.4), ('iris_48', 1.5), ('iris_49', 1.4), ('iris_5', 1.7), ('iris_50', 4.7), ('iris_51', 4.5), ('iris_52', 4.9), ('iris_53', 4.0), ('iris_54', 4.6), ('iris_55', 4.5), ('iris_56', 4.7), ('iris_57', 3.3), ('iris_58', 4.6), ('iris_59', 3.9), ('iris_6', 1.4), ('iris_60', 3.5), ('iris_61', 4.2), ('iris_62', 4.0), ('iris_63', 4.7), ('iris_64', 3.6), ('iris_65', 4.4), ('iris_66', 4.5), ('iris_67', 4.1), ('iris_68', 4.5), ('iris_69', 3.9), ('iris_7', 1.5), ('iris_70', 4.8), ('iris_71', 4.0), ('iris_72', 4.9), ('iris_73', 4.7), ('iris_74', 4.3), ('iris_75', 4.4), ('iris_76', 4.8), ('iris_77', 5.0), ('iris_78', 4.5), ('iris_79', 3.5), ('iris_8', 1.4), ('iris_80', 3.8), ('iris_81', 3.7), ('iris_82', 3.9), ('iris_83', 5.1), ('iris_84', 4.5), ('iris_85', 4.5), ('iris_86', 4.7), ('iris_87', 4.4), ('iris_88', 4.1), ('iris_89', 4.0), ('iris_9', 1.5), ('iris_90', 4.4), ('iris_91', 4.6), ('iris_92', 4.0), ('iris_93', 3.3), ('iris_94', 4.2), ('iris_95', 4.2), ('iris_96', 4.2), ('iris_97', 4.3), ('iris_98', 3.0), ('iris_99', 4.1)])), ('petal_width', OrderedDict([('iris_0', 0.2), ('iris_1', 0.2), ('iris_10', 0.2), ('iris_100', 2.5), ('iris_101', 1.9), ('iris_102', 2.1), ('iris_103', 1.8), ('iris_104', 2.2), ('iris_105', 2.1), ('iris_106', 1.7), ('iris_107', 1.8), ('iris_108', 1.8), ('iris_109', 2.5), ('iris_11', 0.2), ('iris_110', 2.0), ('iris_111', 1.9), ('iris_112', 2.1), ('iris_113', 2.0), ('iris_114', 2.4), ('iris_115', 2.3), ('iris_116', 1.8), ('iris_117', 2.2), ('iris_118', 2.3), ('iris_119', 1.5), ('iris_12', 0.1), ('iris_120', 2.3), ('iris_121', 2.0), ('iris_122', 2.0), ('iris_123', 1.8), ('iris_124', 2.1), ('iris_125', 1.8), ('iris_126', 1.8), ('iris_127', 1.8), ('iris_128', 2.1), ('iris_129', 1.6), ('iris_13', 0.1), ('iris_130', 1.9), ('iris_131', 2.0), ('iris_132', 2.2), ('iris_133', 1.5), ('iris_134', 1.4), ('iris_135', 2.3), ('iris_136', 2.4), ('iris_137', 1.8), ('iris_138', 1.8), ('iris_139', 2.1), ('iris_14', 0.2), ('iris_140', 2.4), ('iris_141', 2.3), ('iris_142', 1.9), ('iris_143', 2.3), ('iris_144', 2.5), ('iris_145', 2.3), ('iris_146', 1.9), ('iris_147', 2.0), ('iris_148', 2.3), ('iris_149', 1.8), ('iris_15', 0.4), ('iris_16', 0.4), ('iris_17', 0.3), ('iris_18', 0.3), ('iris_19', 0.3), ('iris_2', 0.2), ('iris_20', 0.2), ('iris_21', 0.4), ('iris_22', 0.2), ('iris_23', 0.5), ('iris_24', 0.2), ('iris_25', 0.2), ('iris_26', 0.4), ('iris_27', 0.2), ('iris_28', 0.2), ('iris_29', 0.2), ('iris_3', 0.2), ('iris_30', 0.2), ('iris_31', 0.4), ('iris_32', 0.1), ('iris_33', 0.2), ('iris_34', 0.2), ('iris_35', 0.2), ('iris_36', 0.2), ('iris_37', 0.1), ('iris_38', 0.2), ('iris_39', 0.2), ('iris_4', 0.2), ('iris_40', 0.3), ('iris_41', 0.3), ('iris_42', 0.2), ('iris_43', 0.6), ('iris_44', 0.4), ('iris_45', 0.3), ('iris_46', 0.2), ('iris_47', 0.2), ('iris_48', 0.2), ('iris_49', 0.2), ('iris_5', 0.4), ('iris_50', 1.4), ('iris_51', 1.5), ('iris_52', 1.5), ('iris_53', 1.3), ('iris_54', 1.5), ('iris_55', 1.3), ('iris_56', 1.6), ('iris_57', 1.0), ('iris_58', 1.3), ('iris_59', 1.4), ('iris_6', 0.3), ('iris_60', 1.0), ('iris_61', 1.5), ('iris_62', 1.0), ('iris_63', 1.4), ('iris_64', 1.3), ('iris_65', 1.4), ('iris_66', 1.5), ('iris_67', 1.0), ('iris_68', 1.5), ('iris_69', 1.1), ('iris_7', 0.2), ('iris_70', 1.8), ('iris_71', 1.3), ('iris_72', 1.5), ('iris_73', 1.2), ('iris_74', 1.3), ('iris_75', 1.4), ('iris_76', 1.4), ('iris_77', 1.7), ('iris_78', 1.5), ('iris_79', 1.0), ('iris_8', 0.2), ('iris_80', 1.1), ('iris_81', 1.0), ('iris_82', 1.2), ('iris_83', 1.6), ('iris_84', 1.5), ('iris_85', 1.6), ('iris_86', 1.5), ('iris_87', 1.3), ('iris_88', 1.3), ('iris_89', 1.3), ('iris_9', 0.1), ('iris_90', 1.2), ('iris_91', 1.4), ('iris_92', 1.2), ('iris_93', 1.0), ('iris_94', 1.3), ('iris_95', 1.2), ('iris_96', 1.3), ('iris_97', 1.3), ('iris_98', 1.1), ('iris_99', 1.3)]))]) 

# 
X_iris = pd.DataFrame(data)
idx_groupA = X_iris.index[:5]
idx_groupB = X_iris.index[-10:]



pairwise_logfc(X_iris, sorted(idx_groupA), idx_groupB).loc[("iris_90", "iris_0")]
# sepal_length    0.108934
# sepal_width    -0.428843
# petal_length    1.652077
# petal_width     2.584963
# Name: (iris_90, iris_0), dtype: float64

pairwise_logfc(X_iris, sorted(idx_groupA)[::-1], idx_groupB).loc[("iris_90", "iris_0")]
# sepal_length   -0.050626
# sepal_width     0.000000
# petal_length   -0.280108
# petal_width    -0.547488
# Name: (iris_90, iris_0), dtype: float64
O.rka
  • 29,847
  • 68
  • 194
  • 309
  • Sorry, I'm still a little confused. When I do `df_log.loc[group_A,:]` each indice should be the same as `group_A` so `Ar_groupA[i,:]` should be represented by the ith element of `group_A`? – O.rka May 13 '19 at 20:04
  • I was expecting them to be the same b/c of this line: `labels = [*itertools.product(*groups)]`. I thought that the `group_A` and `group_B` would be properly combined. – O.rka May 13 '19 at 20:09
  • I feel that I may either be miscommunicating or missing a crucial piece of logic. `df_log` is a `pd.DataFrame` and has the original index of `X`. `df_log.loc[group_A,:].values` should have the order of `group_A`. The `i_th` element of `df_log.loc[group_A,:]` should be the same as the ith element of `Ar_groupA`. The label that corresponds to both should be `group_A[i]`. Since `itertools.product` is not sorted and is dependent on both input arrays (as expected), it should be the same whether or not I sorted `idx_groupA` before giving it to the function as `group_A`. Right? Thank you. – O.rka May 13 '19 at 20:28

1 Answers1

1

Update. Get rid of the loops in "Pairwise profiles" altogether. We can use numpy broadcasting to do the subtraction in one line :D.

import numpy as np

def pairwise_logfc(df_data, group_A, group_B):
    # Init
    X = df_data.copy()
    attr_labels = X.columns

    # Log Transform
    df_log = np.log2(X)

    # Groups
    Ar_groupA = df_log.loc[group_A,:].values
    Ar_groupB = df_log.loc[group_B,:].values

    # Pairwise profiles
    logfc_profiles = np.vstack(Ar_groupB[:, None] - Ar_groupA)

    groups = (group_B, group_A)
    labels = [*itertools.product(*groups)]
    return pd.DataFrame(logfc_profiles, index=pd.MultiIndex.from_tuples(labels,names=["group_B", "group_A"]), 
                        columns=attr_labels)

pairwise_logfc(X_iris, sorted(idx_groupA), idx_groupB).loc[("iris_90", "iris_0")]
#sepal_length    0.108934
#sepal_width    -0.428843
#petal_length    1.652077
#petal_width     2.584963
#Name: (iris_90, iris_0), dtype: float64

pairwise_logfc(X_iris, sorted(idx_groupA)[::-1], idx_groupB).loc[("iris_90", "iris_0")]
#sepal_length    0.108934
#sepal_width    -0.428843
#petal_length    1.652077
#petal_width     2.584963
#Name: (iris_90, iris_0), dtype: float64

Original Fix:

You need to change the order for your Pairwise profiles logic. In your original, the outer loop is A, but when you define the groups its product([B, A]), so the outer loop needs to be B.

# Pairwise profiles
logfc_profiles = list()
for i in  range(len(group_B)):
    u = Ar_groupB[i,:]
    for j in range(len(group_A)):
        v = Ar_groupA[j,:]
        logfc_profiles.append(u - v)
ALollz
  • 57,915
  • 7
  • 66
  • 89
  • 1
    @O.rka See the update for a nifty way to do the subtraction. – ALollz May 13 '19 at 21:36
  • 1
    Wow, on my dataset the wall time dropped from 14.5s to 4.96s with the `np.new_axis`. Can you describe a little how that can do the same thing as what we were doing before with the for loops? – O.rka May 14 '19 at 16:19
  • 1
    @O.rka, that command is just a fancy alias for `None`, but the result is to add a new axis to your array. So if it's originally (10,4) it becomes (10,1,4). This allows us to use [broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) during a simple subtraction (10,1,4) - (5,4) such that we end up having a single vectorized subtraction instead of loops. [This question](https://stackoverflow.com/questions/33677183/subtracting-numpy-arrays-of-different-shape-efficiently) should be informative. `vstack` then stacks (10,5,4) to (50,4), 2D, so we can make a `DataFrame` – ALollz May 14 '19 at 16:34