I am extracting a table row and the corresponding row from all referenced tables via SQLAlchemy.
Given the following object structure:
class DNAExtractionProtocol(Base):
__tablename__ = 'dna_extraction_protocols'
id = Column(Integer, primary_key=True)
code = Column(String, unique=True)
name = Column(String)
sample_mass = Column(Float)
mass_unit_id = Column(String, ForeignKey('measurement_units.id'))
mass_unit = relationship("MeasurementUnit", foreign_keys=[mass_unit_id])
digestion_buffer_id = Column(String, ForeignKey("solutions.id"))
digestion_buffer = relationship("Solution", foreign_keys=[digestion_buffer_id])
digestion_buffer_volume = Column(Float)
digestion_id = Column(Integer, ForeignKey("incubations.id"))
digestion = relationship("Incubation", foreign_keys=[digestion_id])
lysis_buffer_id = Column(String, ForeignKey("solutions.id"))
lysis_buffer = relationship("Solution", foreign_keys=[lysis_buffer_id])
lysis_buffer_volume = Column(Float)
lysis_id = Column(Integer, ForeignKey("incubations.id"))
lysis = relationship("Incubation", foreign_keys=[lysis_id])
proteinase_id = Column(String, ForeignKey("solutions.id"))
proteinase = relationship("Solution", foreign_keys=[proteinase_id])
proteinase_volume = Column(Float)
inactivation_id = Column(Integer, ForeignKey("incubations.id"))
inactivation = relationship("Incubation", foreign_keys=[inactivation_id])
cooling_id = Column(Integer, ForeignKey("incubations.id"))
cooling = relationship("Incubation", foreign_keys=[cooling_id])
centrifugation_id = Column(Integer, ForeignKey("incubations.id"))
centrifugation = relationship("Incubation", foreign_keys=[centrifugation_id])
volume_unit_id = Column(String, ForeignKey('measurement_units.id'))
volume_unit = relationship("MeasurementUnit", foreign_keys=[volume_unit_id])
I am using:
sql_query = session.query(DNAExtractionProtocol).options(Load(DNAExtractionProtocol).joinedload("*")).filter(DNAExtractionProtocol.code == code)
for item in sql_query:
pass
mystring = str(sql_query)
mydf = pd.read_sql_query(mystring,engine,params=[code])
print(mydf.columns)
This gives me:
Index([u'dna_extraction_protocols_id', u'dna_extraction_protocols_code',
u'dna_extraction_protocols_name',
u'dna_extraction_protocols_sample_mass',
u'dna_extraction_protocols_mass_unit_id',
u'dna_extraction_protocols_digestion_buffer_id',
u'dna_extraction_protocols_digestion_buffer_volume',
u'dna_extraction_protocols_digestion_id',
u'dna_extraction_protocols_lysis_buffer_id',
u'dna_extraction_protocols_lysis_buffer_volume',
u'dna_extraction_protocols_lysis_id',
u'dna_extraction_protocols_proteinase_id',
u'dna_extraction_protocols_proteinase_volume',
u'dna_extraction_protocols_inactivation_id',
u'dna_extraction_protocols_cooling_id',
u'dna_extraction_protocols_centrifugation_id',
u'dna_extraction_protocols_volume_unit_id', u'measurement_units_1_id',
u'measurement_units_1_code', u'measurement_units_1_long_name',
u'measurement_units_1_siunitx', u'solutions_1_id', u'solutions_1_code',
u'solutions_1_name', u'solutions_1_supplier',
u'solutions_1_supplier_id', u'incubations_1_id', u'incubations_1_speed',
u'incubations_1_duration', u'incubations_1_temperature',
u'incubations_1_movement', u'incubations_1_speed_unit_id',
u'incubations_1_duration_unit_id', u'incubations_1_temperature_unit_id',
u'solutions_2_id', u'solutions_2_code', u'solutions_2_name',
u'solutions_2_supplier', u'solutions_2_supplier_id',
u'incubations_2_id', u'incubations_2_speed', u'incubations_2_duration',
u'incubations_2_temperature', u'incubations_2_movement',
u'incubations_2_speed_unit_id', u'incubations_2_duration_unit_id',
u'incubations_2_temperature_unit_id', u'solutions_3_id',
u'solutions_3_code', u'solutions_3_name', u'solutions_3_supplier',
u'solutions_3_supplier_id', u'incubations_3_id', u'incubations_3_speed',
u'incubations_3_duration', u'incubations_3_temperature',
u'incubations_3_movement', u'incubations_3_speed_unit_id',
u'incubations_3_duration_unit_id', u'incubations_3_temperature_unit_id',
u'incubations_4_id', u'incubations_4_speed', u'incubations_4_duration',
u'incubations_4_temperature', u'incubations_4_movement',
u'incubations_4_speed_unit_id', u'incubations_4_duration_unit_id',
u'incubations_4_temperature_unit_id', u'incubations_5_id',
u'incubations_5_speed', u'incubations_5_duration',
u'incubations_5_temperature', u'incubations_5_movement',
u'incubations_5_speed_unit_id', u'incubations_5_duration_unit_id',
u'incubations_5_temperature_unit_id', u'measurement_units_2_id',
u'measurement_units_2_code', u'measurement_units_2_long_name',
u'measurement_units_2_siunitx', u'dna_extractions_1_id',
u'dna_extractions_1_code', u'dna_extractions_1_protocol_id',
u'dna_extractions_1_source_id'],
dtype='object')
This indeed contains all the columns I want - but the naming does not help me select what I want.
Isit possible to preserve the key names from the original table in this dataframe? e.g. instead of measurement_units_1_code
I would like to have mass_unit_code
.