Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 107 additions & 29 deletions src/chilife/MolSys.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
"resindices",
"segixs",
"segindices",
"entity_ids",
"entities",
"_Atoms",
"_Residues",
"atoms",
Expand Down Expand Up @@ -72,6 +74,7 @@
"occupancy": "occupancies",
"segix": "segixs",
"resix": "resixs",
"entity": "entity_ids",
}


Expand Down Expand Up @@ -397,6 +400,10 @@ def add_bonds(self, bonds, bond_type=None, bond_chiral=None, update_topology=Tru
new_bond_types.append(btype)
new_bond_chiral.append(chiral)

# If all bonds are accounted for we don't have to change anything
if len(new_bonds) == 0:
return

# Convert to numpy arrays
new_bonds = np.array(new_bonds)
new_bond_types = np.array(new_bond_types)
Expand Down Expand Up @@ -518,8 +525,35 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
nonpoly_mon_id, nonpoly_icode, nonpoly_seq_num, nonpoly_sid = [], [], [], []

for i, seg in enumerate(self.segments):
segids.append(str(i + 1))
if len(seg.residues) > 1:
ix0 = int(np.min(seg.mask))
entity_eid = str(self.entity_ids[ix0])
segids.append(entity_eid)

# Determine if this segment is a polymer
if len(seg.residues) == 1:
is_polymer = False
# inter-residue bonds implies a polymer
elif seg.bonds is not None and ~np.all(
(
self.molsys.resindices[seg.bonds[:, 0]]
== self.molsys.resindices[seg.bonds[:, 1]]
)
):
is_polymer = True

# being a known amino acid or nucleotide implies a polymer
elif np.any(
np.isin(
seg.resnames,
tuple(chilife.nataa_codes) + tuple(chilife.natnu_codes),
)
):
is_polymer = True

else:
is_polymer = False

if is_polymer:
stype.append("polymer")
if any(x.resname in chilife.nataa_codes for x in seg.residues):
poly_type.append("polypeptide")
Expand All @@ -528,8 +562,8 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
else:
poly_type.append("unknown")

poly_ids.append(str(i + 1))
strand_ids.append(str(seg.segid))
poly_ids.append(entity_eid)
strand_ids.append(str(seg.chain))

poly_seq_num.extend([str(x) for x in range(1, len(seg.residues) + 1)])
poly_seq_mon_id.extend([res.resname for res in seg.residues])
Expand All @@ -540,24 +574,23 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
]
)
poly_seq_eids.extend([poly_ids[-1]] * len(seg.residues))
poly_asym.extend([str(res.segid) for res in seg.residues])
poly_asym.extend([str(res.chain) for res in seg.residues])
poly_auth_num.extend([str(res.resnum) for res in seg.residues])
poly_icodes.extend(
[res.icode if res.icode.strip() else "." for res in seg.residues]
)

else:
res = seg.residues[0]
nonpoly_eid.append(str(i + 1))
nonpoly_asym_id.append(str(seg.segid))
nonpoly_auth_seq.append(str(res.resnum))
nonpoly_mon_id.append(res.resname)
nonpoly_icode.append(res.icode if res.icode != "" else ".")
nonpoly_seq_num.append(str(1))
nonpoly_sid.append(str(seg.segid))
stype.append("non-polymer")

entites = self.segindices
for res in seg.residues:
nonpoly_eid.append(entity_eid)
nonpoly_asym_id.append(str(seg.segid))
nonpoly_auth_seq.append(str(res.resnum))
nonpoly_mon_id.append(res.resname)
nonpoly_icode.append(res.icode if res.icode != "" else ".")
nonpoly_seq_num.append(str(1))
nonpoly_sid.append(str(seg.chain))
stype.append("non-polymer")

atom_site_data = struct_to_cif_atom_site(self)

local_ccd = {}
Expand Down Expand Up @@ -687,7 +720,14 @@ def _make_sdf_data(self):

properties = {}

if len(chg_mask := np.argwhere(self.charges != 0).flatten().tolist()) > 0:
if (
len(
chg_mask := np.argwhere((self.charges != 0) * (~np.isnan(self.charges)))
.flatten()
.tolist()
)
> 0
):
properties["CHG"] = {
"atom_ids": chg_mask,
"charges": self.charges[chg_mask].tolist(),
Expand Down Expand Up @@ -780,6 +820,8 @@ class MolSys(MolecularSystemBase):
Array of the residue indices each atom belongs to. resindices are 0-indexed.
segindices: np.ndarray | None
Array of the segment index each atom belongs to. segindices are 0-indexed.
entity_ids : np.ndarray | None
Per-atom mmCIF ``label_entity_id`` strings. If omitted, inferred from ``segs`` (first-appearance order).
bonds : ArrayLike | None
Array of atom ID pairs corresponding to all atom bonds in the system.
bond_types : ArrayLike | None
Expand Down Expand Up @@ -817,6 +859,7 @@ def __init__(
bond_chiral: np.ndarray | None = None,
name: str = "Noname_MolSys",
use_ccd: dict | None = None,
entity_ids: np.ndarray | None = None,
):
# self.molsys = weakref.proxy(self)
self.record_types = record_types.copy()
Expand Down Expand Up @@ -872,6 +915,13 @@ def __init__(
self.segixs = segindices
self.segindices = self.segixs

if entity_ids is None:
self.entity_ids = np.array(self.segindices + 1, dtype=str)
else:
self.entity_ids = np.asarray(entity_ids, dtype=str).copy()

self.entity = self.entity_ids

self.is_protein = np.array(
[res in chilife.SUPPORTED_RESIDUES for res in resnames]
)
Expand Down Expand Up @@ -906,6 +956,8 @@ def __init__(
"q": self.occupancies,
"elem": self.atypes,
"element": self.atypes,
"entities": self.entity_ids,
"entity_ids": self.entity_ids,
"protein": self.is_protein,
"_len": self.n_atoms,
}
Expand Down Expand Up @@ -1081,7 +1133,12 @@ def from_cif(cls, file_name):
cif_data = cif_data[name]
atom_site_data = cif_data["atom_site"]

model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten()
# Some CIFs don't use model numbers
if "pdbx_PDB_model_num" in atom_site_data:
model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten()
else:
model_id = np.ones_like(atom_site_data["label_atom_id"])

model_ids = np.unique(model_id)
n_models = len(model_ids)
n_atoms = np.sum(model_id == model_ids[0])
Expand Down Expand Up @@ -1111,8 +1168,15 @@ def from_cif(cls, file_name):
icodes = np.array(atom_site_data["pdbx_PDB_ins_code"][:n_atoms])
icodes[icodes == "?"] = ""

segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms]) + np.char.array(
atom_site_data["label_entity_id"][:n_atoms]
segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms])

raw_eid = atom_site_data["label_entity_id"][:n_atoms]
parsed_eid = np.array(
[
"" if str(x).strip() in (".", "?", "") else str(x).strip()
for x in raw_eid
],
dtype=str,
)

occupancies = np.array(atom_site_data["occupancy"][:n_atoms], dtype=float)
Expand Down Expand Up @@ -1150,6 +1214,7 @@ def from_cif(cls, file_name):
charges,
name=name,
use_ccd=ccd_data,
entity_ids=parsed_eid,
)

# Parse inter-residue bonds from the _struct_conn loop (covale, disulf, metalc).
Expand Down Expand Up @@ -1566,6 +1631,8 @@ def copy(self):
bonds = None
bond_types = None

entity_ids = self.entity_ids if hasattr(self, "entity_ids") else None

return MolSys(
record_types=self.record_types,
atomids=self.atomids,
Expand All @@ -1584,6 +1651,7 @@ def copy(self):
bonds=bonds,
bond_types=bond_types,
name=self.names,
entity_ids=entity_ids,
)

def load_new(self, coordinates):
Expand All @@ -1600,9 +1668,10 @@ def load_new(self, coordinates):
@property
def _atoms(self):
if not hasattr(self, "_Atoms"):
self._Atoms = np.array(
[Atom(weakref.proxy(self), i) for i in range(self.n_atoms)]
)
proxy = weakref.proxy(self)
self._Atoms = np.empty(self.n_atoms, dtype=object)
for i in range(self.n_atoms):
self._Atoms[i] = Atom(proxy, i)
return self._Atoms

@property
Expand Down Expand Up @@ -2083,7 +2152,7 @@ def __len__(self):
return len(self.first_ix)

def __iter__(self):
for resix in self.resixs:
for resix in np.atleast_1d(self.resixs):
yield self.molsys._residues[resix]


Expand Down Expand Up @@ -2195,6 +2264,9 @@ def bonded_atoms(self):
all_idxs = np.array([idx for idx in all_idxs if idx != self.mask])
return AtomSelection(self.molsys, all_idxs)

def __len__(self):
return 1


class Residue(MolecularSystemBase):
"""
Expand Down Expand Up @@ -2309,13 +2381,13 @@ class Segment(MolecularSystemBase):
An array of atom indices that defines the atoms of the segment.
"""

def __init__(self, molsys, mask):
def __init__(self, molsys, mask, bond_mask=None):
segix = np.unique(molsys.segixs[mask])[0]
self.mask = np.where(molsys.segixs == segix)[0]
self.molsys = molsys
self.segindex = segix

self._update_bond_mask(mask)
self._update_bond_mask(bond_mask)


def byres(mask, molsys):
Expand Down Expand Up @@ -2443,6 +2515,7 @@ def concat_molsys(*systems):
bonds = []
bond_types = []
bond_chiral = []
entity_ids_parts = []
names = []
n_atoms = 0
n_residues = 0
Expand All @@ -2467,6 +2540,7 @@ def concat_molsys(*systems):
atypes.append(sys.atypes)
charges.append(sys.charges)
chiral.append(sys.chiral)
entity_ids_parts.append(sys.entity_ids)

if len(sys.trajectory) != traj_len:
raise AttributeError(
Expand All @@ -2481,7 +2555,7 @@ def concat_molsys(*systems):
bond_types = None
bond_chiral = None
elif sys.bonds is None:
bonds.append(np.array([[]]))
bonds.append(np.empty((0, 2)))
bond_types.append(np.array([]))
bond_chiral.append(np.array([]))
else:
Expand Down Expand Up @@ -2531,6 +2605,7 @@ def concat_molsys(*systems):
chiral = np.concatenate(chiral) if np.any([x is not None for x in chiral]) else None
resindices = np.concatenate(resindices)
segindices = np.concatenate(segindices)
entity_ids_cat = np.concatenate(entity_ids_parts)

if has_bonds:
bonds = np.concatenate(bonds)
Expand Down Expand Up @@ -2564,6 +2639,7 @@ def concat_molsys(*systems):
bond_types,
bond_chiral,
name,
entity_ids=entity_ids_cat,
)


Expand Down Expand Up @@ -2643,8 +2719,10 @@ def struct_to_cif_atom_site(struct):
[x if x.strip() else "." for x in struct.altlocs], n_frames
).tolist()
label_comp_id = np.tile(struct.resnames, n_frames).tolist()
label_asym_id = np.tile(struct.segids, n_frames).tolist()
label_entity_id = np.tile((struct.segindices + 1).astype(str), n_frames).tolist()
label_asym_id = np.tile(np.array(struct.segids, dtype=str), n_frames).tolist()
label_entity_id = np.tile(
np.asarray(struct.entity_ids, dtype=str), n_frames
).tolist()
label_seq_id = np.tile((struct.resindices + 1).astype(str), n_frames).tolist()
pdbx_PDB_ins_code = np.tile(
[x if x.strip() else "?" for x in struct.icodes], n_frames
Expand Down
2 changes: 0 additions & 2 deletions src/chilife/Topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,6 @@ def bonds_from_ccd_data(molsys, ccd_data):
a1, a2, btype, bchiral = POLYMER_LINKAGE_TYPES[link_type]
i1 = pres.ix[pres.names == a1].flat[0]
i2 = res.ix[res.names == a2].flat[0]
if i1 == 0 and i2 == 1255:
print("break")
bonds.append([i1, i2])
bond_types.append(btype)
bond_chiral.append(bchiral)
Expand Down
2 changes: 1 addition & 1 deletion src/chilife/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@
# SpinLabel = SpinLabel.SpinLabel
# dSpinLabel = dSpinLabel.dSpinLabel

__version__ = "1.2.2"
__version__ = "1.2.3"
Loading
Loading