OK, working in Biopython 1,79, I tried to load your pdb : 2r09 ,
borrowing from https://github.com/agpe/biopython-ligands
active_site.py
, modified into :
import Bio.PDB
import numpy
from Bio.PDB import PDBIO
cnt = 0
def residue_dist_to_ligand(protein_residue, ligand_residue) :
#Returns distance from the protein C-alpha to the closest ligand atom
dist = []
for atom in ligand_residue :
if "CA" in protein_residue:
vector = protein_residue["CA"].coord - atom.coord
dist.append(numpy.sqrt(numpy.sum(vector * vector)))
return min(dist)
def get_ligand_by_name(residue_name, model):
#Extract ligands from all chains in a model by its name
global ligands
ligands = {}
chains = model.child_dict
for c in chains:
ligands[c] = []
for protein_res in chains[c].child_list:
if protein_res.resname == residue_name:
ligands[c].append(protein_res)
print('ligands:', ligands)
return ligands
def get_ligand_by_chain(chain_name, model):
#Extract all ligand residues from given chain name
global ligands
ligands = {}
ligands[chain_name] = []
chains = model.child_dict
for protein_res in chains[chain_name].child_list:
ligands[chain_name].append(protein_res)
return ligands
def active_site(ligands, distance, model):
# Prints out residues located at a given distance from ligand
chains = model.child_dict
for group in ligands.values():
for ligand_res in group:
print("ligand residue: "+ligand_res.resname, ligand_res.id[1])
for c in chains:
for protein_res in chains[c].child_list:
if protein_res not in group:
dist = residue_dist_to_ligand(protein_res, ligand_res)
if dist and dist < distance :
print(protein_res.resname, protein_res.id[1], dist)
def save_ligand(structure, filename):
print('ligands.items() : ', ligands.items())
otto = [x[0].id[0] for x in ligands.values()]
print([x[0].id[0] for x in ligands.values()])
nove= [x[0] for x in ligands.keys()]
print([x[0] for x in ligands.keys()])
# Saves ligand to a filename.pdb
Select = Bio.PDB.Select
class LigandSelect(Select):
def accept_residue(self, residue):
if str(residue.id[0]) in otto and str(residue.get_parent().id) in nove:
global cnt
cnt += 1
print('ligands ñ#'+str(cnt)+' : ', str(residue.id[0]) ,' from chin : ', str(residue.get_parent().id))
return 1
else:
return 0
io=PDBIO()
io.set_structure(structure)
io.save(filename+'.pdb', LigandSelect() , preserve_atom_numbering=False)
after more googling realized that def save_ligand(structure, filename)
could be written in a shorter way:
def save_ligand(structure, filename):
# Saves ligand to a filename.pdb
Select = Bio.PDB.Select
class LigandSelect(Select):
def accept_residue(self, residue):
return residue in [item for sublist in ligands.values() for item in sublist]
io=PDBIO()
io.set_structure(structure)
io.save(filename+'.pdb', LigandSelect(), preserve_atom_numbering=False)
my code main.py
:
from Bio import PDB
from Bio.PDB import PDBIO
import active_site as SV
parser = PDB.PDBParser(PERMISSIVE=1, QUIET=1)
structure: PDB.Structure.Structure = parser.get_structure("prova", "2r09.pdb")
ligand = SV.get_ligand_by_name('4IP', structure[0])
print('ligand -> ', ligand, type(ligand))
SV.save_ligand(structure[0], 'ligand')
I can load your PDB files and save both the ligands present in the structure
(same ligand for two different chains: A,B ; don't know if protein is monomer or dimer (i.e. monomer but dimeric in AU, actual dimer))
my results as ligand.pdb
is:
HETATM 1 C1 4IP A 405 80.967 85.113 26.680 1.00 22.42 C
HETATM 2 O1 4IP A 405 82.327 85.039 27.129 1.00 23.40 O
HETATM 3 C2 4IP A 405 80.917 85.791 25.309 1.00 22.60 C
HETATM 4 O2 4IP A 405 81.463 87.121 25.385 1.00 20.92 O
HETATM 5 C3 4IP A 405 79.465 85.823 24.827 1.00 20.89 C
HETATM 6 O3 4IP A 405 79.392 86.282 23.475 1.00 20.17 O
HETATM 7 C4 4IP A 405 78.585 86.660 25.757 1.00 20.60 C
HETATM 8 O4 4IP A 405 77.224 86.534 25.358 1.00 20.32 O
HETATM 9 C5 4IP A 405 78.675 86.137 27.199 1.00 21.70 C
HETATM 10 O5 4IP A 405 77.999 87.108 28.007 1.00 20.84 O
HETATM 11 C6 4IP A 405 80.116 85.898 27.686 1.00 21.33 C
HETATM 12 O6 4IP A 405 80.130 85.209 28.962 1.00 21.36 O
HETATM 13 P1 4IP A 405 83.280 83.762 26.840 1.00 25.39 P
HETATM 14 O1P 4IP A 405 84.620 84.387 27.067 1.00 24.93 O
HETATM 15 O2P 4IP A 405 83.011 83.316 25.424 1.00 24.90 O
HETATM 16 O3P 4IP A 405 82.883 82.738 27.868 1.00 26.91 O
HETATM 17 P3 4IP A 405 78.855 85.297 22.324 1.00 20.21 P
HETATM 18 O4P 4IP A 405 77.428 84.990 22.747 1.00 20.15 O
HETATM 19 O5P 4IP A 405 79.754 84.074 22.330 1.00 21.26 O
HETATM 20 O6P 4IP A 405 78.942 86.136 21.074 1.00 19.72 O
HETATM 21 P4 4IP A 405 76.408 87.690 24.600 1.00 19.61 P
HETATM 22 O7P 4IP A 405 77.277 88.918 24.665 1.00 19.52 O
HETATM 23 O8P 4IP A 405 75.127 87.775 25.380 1.00 19.15 O
HETATM 24 O9P 4IP A 405 76.215 87.157 23.199 1.00 19.73 O
HETATM 25 P5 4IP A 405 77.266 86.724 29.383 1.00 21.18 P
HETATM 26 OPF 4IP A 405 76.447 85.480 29.103 1.00 19.82 O
HETATM 27 OPG 4IP A 405 76.396 87.922 29.662 1.00 21.85 O
HETATM 28 OPH 4IP A 405 78.385 86.527 30.363 1.00 20.70 O
TER 29 4IP A 405
HETATM 29 C1 4IP B 400 19.832 75.384 -3.106 1.00 26.91 C
HETATM 30 O1 4IP B 400 18.852 74.503 -3.643 1.00 28.03 O
HETATM 31 C2 4IP B 400 19.809 75.360 -1.573 1.00 25.72 C
HETATM 32 O2 4IP B 400 18.531 75.764 -1.075 1.00 26.15 O
HETATM 33 C3 4IP B 400 20.886 76.313 -1.046 1.00 25.51 C
HETATM 34 O3 4IP B 400 20.949 76.259 0.388 1.00 24.29 O
HETATM 35 C4 4IP B 400 20.660 77.747 -1.535 1.00 24.17 C
HETATM 36 O4 4IP B 400 21.786 78.546 -1.164 1.00 23.22 O
HETATM 37 C5 4IP B 400 20.563 77.819 -3.060 1.00 26.06 C
HETATM 38 O5 4IP B 400 20.179 79.156 -3.403 1.00 24.79 O
HETATM 39 C6 4IP B 400 19.613 76.789 -3.678 1.00 26.21 C
HETATM 40 O6 4IP B 400 19.845 76.713 -5.095 1.00 26.93 O
HETATM 41 P1 4IP B 400 19.148 72.969 -4.040 1.00 30.18 P
HETATM 42 O1P 4IP B 400 17.769 72.379 -3.959 1.00 29.62 O
HETATM 43 O2P 4IP B 400 19.703 72.997 -5.442 1.00 32.15 O
HETATM 44 O3P 4IP B 400 20.131 72.484 -3.004 1.00 30.58 O
HETATM 45 P3 4IP B 400 22.293 75.724 1.102 1.00 25.03 P
HETATM 46 O4P 4IP B 400 23.378 76.683 0.677 1.00 23.91 O
HETATM 47 O5P 4IP B 400 22.477 74.324 0.571 1.00 24.81 O
HETATM 48 O6P 4IP B 400 21.967 75.846 2.579 1.00 23.92 O
HETATM 49 P4 4IP B 400 21.769 79.638 0.027 1.00 24.76 P
HETATM 50 O7P 4IP B 400 20.352 79.738 0.497 1.00 22.44 O
HETATM 51 O8P 4IP B 400 22.374 80.892 -0.565 1.00 23.32 O
HETATM 52 O9P 4IP B 400 22.674 79.045 1.083 1.00 24.83 O
HETATM 53 P5 4IP B 400 20.543 79.836 -4.819 1.00 27.59 P
HETATM 54 OPF 4IP B 400 22.002 79.494 -5.029 1.00 26.64 O
HETATM 55 OPG 4IP B 400 20.334 81.296 -4.538 1.00 26.24 O
HETATM 56 OPH 4IP B 400 19.586 79.196 -5.799 1.00 25.90 O
TER 57 4IP B 400
END
the coordinates look similar to original pdb:
HETATM 5636 C1 4IP A 405 80.967 85.113 26.680 1.00 22.42 C
HETATM 5637 O1 4IP A 405 82.327 85.039 27.129 1.00 23.40 O
HETATM 5638 C2 4IP A 405 80.917 85.791 25.309 1.00 22.60 C
.....
HETATM 5706 C1 4IP B 400 19.832 75.384 -3.106 1.00 26.91 C
HETATM 5707 O1 4IP B 400 18.852 74.503 -3.643 1.00 28.03 O
HETATM 5708 C2 4IP B 400 19.809 75.360 -1.573 1.00 25.72 C
HETATM 5709 O2 4IP B 400 18.531 75.764 -1.075 1.00 26.15 O
check it out, to see if I didn't make any mistake. Can't you upgrade to 1.79 from 1.78 ?