StollLab · mtessmer · May 5, 2026 · May 1, 2026 · May 2, 2026 · May 2, 2026
diff --git a/src/chilife/MolSys.py b/src/chilife/MolSys.py
@@ -45,6 +45,8 @@
     "resindices",
     "segixs",
     "segindices",
+    "entity_ids",
+    "entities",
     "_Atoms",
     "_Residues",
     "atoms",
@@ -72,6 +74,7 @@
     "occupancy": "occupancies",
     "segix": "segixs",
     "resix": "resixs",
+    "entity": "entity_ids",
 }
 
 
@@ -397,6 +400,10 @@ def add_bonds(self, bonds, bond_type=None, bond_chiral=None, update_topology=Tru
                 new_bond_types.append(btype)
                 new_bond_chiral.append(chiral)
 
+        # If all bonds are accounted for we don't have to change anything
+        if len(new_bonds) == 0:
+            return
+
         # Convert to numpy arrays
         new_bonds = np.array(new_bonds)
         new_bond_types = np.array(new_bond_types)
@@ -518,8 +525,35 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
         nonpoly_mon_id, nonpoly_icode, nonpoly_seq_num, nonpoly_sid = [], [], [], []
 
         for i, seg in enumerate(self.segments):
-            segids.append(str(i + 1))
-            if len(seg.residues) > 1:
+            ix0 = int(np.min(seg.mask))
+            entity_eid = str(self.entity_ids[ix0])
+            segids.append(entity_eid)
+
+            # Determine if this segment is a polymer
+            if len(seg.residues) == 1:
+                is_polymer = False
+            # inter-residue bonds implies a polymer
+            elif seg.bonds is not None and ~np.all(
+                (
+                    self.molsys.resindices[seg.bonds[:, 0]]
+                    == self.molsys.resindices[seg.bonds[:, 1]]
+                )
+            ):
+                is_polymer = True
+
+            # being a known amino acid or nucleotide implies a polymer
+            elif np.any(
+                np.isin(
+                    seg.resnames,
+                    tuple(chilife.nataa_codes) + tuple(chilife.natnu_codes),
+                )
+            ):
+                is_polymer = True
+
+            else:
+                is_polymer = False
+
+            if is_polymer:
                 stype.append("polymer")
                 if any(x.resname in chilife.nataa_codes for x in seg.residues):
                     poly_type.append("polypeptide")
@@ -528,8 +562,8 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
                 else:
                     poly_type.append("unknown")
 
-                poly_ids.append(str(i + 1))
-                strand_ids.append(str(seg.segid))
+                poly_ids.append(entity_eid)
+                strand_ids.append(str(seg.chain))
 
                 poly_seq_num.extend([str(x) for x in range(1, len(seg.residues) + 1)])
                 poly_seq_mon_id.extend([res.resname for res in seg.residues])
@@ -540,24 +574,23 @@ def write_cif(self, filename, use_bio_ccd: bool = True):
                     ]
                 )
                 poly_seq_eids.extend([poly_ids[-1]] * len(seg.residues))
-                poly_asym.extend([str(res.segid) for res in seg.residues])
+                poly_asym.extend([str(res.chain) for res in seg.residues])
                 poly_auth_num.extend([str(res.resnum) for res in seg.residues])
                 poly_icodes.extend(
                     [res.icode if res.icode.strip() else "." for res in seg.residues]
                 )
 
             else:
-                res = seg.residues[0]
-                nonpoly_eid.append(str(i + 1))
-                nonpoly_asym_id.append(str(seg.segid))
-                nonpoly_auth_seq.append(str(res.resnum))
-                nonpoly_mon_id.append(res.resname)
-                nonpoly_icode.append(res.icode if res.icode != "" else ".")
-                nonpoly_seq_num.append(str(1))
-                nonpoly_sid.append(str(seg.segid))
-                stype.append("non-polymer")
-
-        entites = self.segindices
+                for res in seg.residues:
+                    nonpoly_eid.append(entity_eid)
+                    nonpoly_asym_id.append(str(seg.segid))
+                    nonpoly_auth_seq.append(str(res.resnum))
+                    nonpoly_mon_id.append(res.resname)
+                    nonpoly_icode.append(res.icode if res.icode != "" else ".")
+                    nonpoly_seq_num.append(str(1))
+                    nonpoly_sid.append(str(seg.chain))
+                    stype.append("non-polymer")
+
         atom_site_data = struct_to_cif_atom_site(self)
 
         local_ccd = {}
@@ -687,7 +720,14 @@ def _make_sdf_data(self):
 
         properties = {}
 
-        if len(chg_mask := np.argwhere(self.charges != 0).flatten().tolist()) > 0:
+        if (
+            len(
+                chg_mask := np.argwhere((self.charges != 0) * (~np.isnan(self.charges)))
+                .flatten()
+                .tolist()
+            )
+            > 0
+        ):
             properties["CHG"] = {
                 "atom_ids": chg_mask,
                 "charges": self.charges[chg_mask].tolist(),
@@ -780,6 +820,8 @@ class MolSys(MolecularSystemBase):
         Array of the residue indices each atom belongs to. resindices are 0-indexed.
     segindices: np.ndarray | None
         Array of the segment index each atom belongs to. segindices are 0-indexed.
+    entity_ids : np.ndarray | None
+        Per-atom mmCIF ``label_entity_id`` strings. If omitted, inferred from ``segs`` (first-appearance order).
     bonds : ArrayLike | None
         Array of atom ID pairs corresponding to all atom bonds in the system.
     bond_types : ArrayLike | None
@@ -817,6 +859,7 @@ def __init__(
         bond_chiral: np.ndarray | None = None,
         name: str = "Noname_MolSys",
         use_ccd: dict | None = None,
+        entity_ids: np.ndarray | None = None,
     ):
         # self.molsys = weakref.proxy(self)
         self.record_types = record_types.copy()
@@ -872,6 +915,13 @@ def __init__(
             self.segixs = segindices
         self.segindices = self.segixs
 
+        if entity_ids is None:
+            self.entity_ids = np.array(self.segindices + 1, dtype=str)
+        else:
+            self.entity_ids = np.asarray(entity_ids, dtype=str).copy()
+
+        self.entity = self.entity_ids
+
         self.is_protein = np.array(
             [res in chilife.SUPPORTED_RESIDUES for res in resnames]
         )
@@ -906,6 +956,8 @@ def __init__(
             "q": self.occupancies,
             "elem": self.atypes,
             "element": self.atypes,
+            "entities": self.entity_ids,
+            "entity_ids": self.entity_ids,
             "protein": self.is_protein,
             "_len": self.n_atoms,
         }
@@ -1081,7 +1133,12 @@ def from_cif(cls, file_name):
         cif_data = cif_data[name]
         atom_site_data = cif_data["atom_site"]
 
-        model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten()
+        # Some CIFs don't use model numbers
+        if "pdbx_PDB_model_num" in atom_site_data:
+            model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten()
+        else:
+            model_id = np.ones_like(atom_site_data["label_atom_id"])
+
         model_ids = np.unique(model_id)
         n_models = len(model_ids)
         n_atoms = np.sum(model_id == model_ids[0])
@@ -1111,8 +1168,15 @@ def from_cif(cls, file_name):
         icodes = np.array(atom_site_data["pdbx_PDB_ins_code"][:n_atoms])
         icodes[icodes == "?"] = ""
 
-        segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms]) + np.char.array(
-            atom_site_data["label_entity_id"][:n_atoms]
+        segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms])
+
+        raw_eid = atom_site_data["label_entity_id"][:n_atoms]
+        parsed_eid = np.array(
+            [
+                "" if str(x).strip() in (".", "?", "") else str(x).strip()
+                for x in raw_eid
+            ],
+            dtype=str,
         )
 
         occupancies = np.array(atom_site_data["occupancy"][:n_atoms], dtype=float)
@@ -1150,6 +1214,7 @@ def from_cif(cls, file_name):
             charges,
             name=name,
             use_ccd=ccd_data,
+            entity_ids=parsed_eid,
         )
 
         # Parse inter-residue bonds from the _struct_conn loop (covale, disulf, metalc).
@@ -1566,6 +1631,8 @@ def copy(self):
             bonds = None
             bond_types = None
 
+        entity_ids = self.entity_ids if hasattr(self, "entity_ids") else None
+
         return MolSys(
             record_types=self.record_types,
             atomids=self.atomids,
@@ -1584,6 +1651,7 @@ def copy(self):
             bonds=bonds,
             bond_types=bond_types,
             name=self.names,
+            entity_ids=entity_ids,
         )
 
     def load_new(self, coordinates):
@@ -1600,9 +1668,10 @@ def load_new(self, coordinates):
     @property
     def _atoms(self):
         if not hasattr(self, "_Atoms"):
-            self._Atoms = np.array(
-                [Atom(weakref.proxy(self), i) for i in range(self.n_atoms)]
-            )
+            proxy = weakref.proxy(self)
+            self._Atoms = np.empty(self.n_atoms, dtype=object)
+            for i in range(self.n_atoms):
+                self._Atoms[i] = Atom(proxy, i)
         return self._Atoms
 
     @property
@@ -2083,7 +2152,7 @@ def __len__(self):
         return len(self.first_ix)
 
     def __iter__(self):
-        for resix in self.resixs:
+        for resix in np.atleast_1d(self.resixs):
             yield self.molsys._residues[resix]
 
 
@@ -2195,6 +2264,9 @@ def bonded_atoms(self):
         all_idxs = np.array([idx for idx in all_idxs if idx != self.mask])
         return AtomSelection(self.molsys, all_idxs)
 
+    def __len__(self):
+        return 1
+
 
 class Residue(MolecularSystemBase):
     """
@@ -2309,13 +2381,13 @@ class Segment(MolecularSystemBase):
         An array of atom indices that defines the atoms of the segment.
     """
 
-    def __init__(self, molsys, mask):
+    def __init__(self, molsys, mask, bond_mask=None):
         segix = np.unique(molsys.segixs[mask])[0]
         self.mask = np.where(molsys.segixs == segix)[0]
         self.molsys = molsys
         self.segindex = segix
 
-        self._update_bond_mask(mask)
+        self._update_bond_mask(bond_mask)
 
 
 def byres(mask, molsys):
@@ -2443,6 +2515,7 @@ def concat_molsys(*systems):
     bonds = []
     bond_types = []
     bond_chiral = []
+    entity_ids_parts = []
     names = []
     n_atoms = 0
     n_residues = 0
@@ -2467,6 +2540,7 @@ def concat_molsys(*systems):
         atypes.append(sys.atypes)
         charges.append(sys.charges)
         chiral.append(sys.chiral)
+        entity_ids_parts.append(sys.entity_ids)
 
         if len(sys.trajectory) != traj_len:
             raise AttributeError(
@@ -2481,7 +2555,7 @@ def concat_molsys(*systems):
             bond_types = None
             bond_chiral = None
         elif sys.bonds is None:
-            bonds.append(np.array([[]]))
+            bonds.append(np.empty((0, 2)))
             bond_types.append(np.array([]))
             bond_chiral.append(np.array([]))
         else:
@@ -2531,6 +2605,7 @@ def concat_molsys(*systems):
     chiral = np.concatenate(chiral) if np.any([x is not None for x in chiral]) else None
     resindices = np.concatenate(resindices)
     segindices = np.concatenate(segindices)
+    entity_ids_cat = np.concatenate(entity_ids_parts)
 
     if has_bonds:
         bonds = np.concatenate(bonds)
@@ -2564,6 +2639,7 @@ def concat_molsys(*systems):
         bond_types,
         bond_chiral,
         name,
+        entity_ids=entity_ids_cat,
     )
 
 
@@ -2643,8 +2719,10 @@ def struct_to_cif_atom_site(struct):
         [x if x.strip() else "." for x in struct.altlocs], n_frames
     ).tolist()
     label_comp_id = np.tile(struct.resnames, n_frames).tolist()
-    label_asym_id = np.tile(struct.segids, n_frames).tolist()
-    label_entity_id = np.tile((struct.segindices + 1).astype(str), n_frames).tolist()
+    label_asym_id = np.tile(np.array(struct.segids, dtype=str), n_frames).tolist()
+    label_entity_id = np.tile(
+        np.asarray(struct.entity_ids, dtype=str), n_frames
+    ).tolist()
     label_seq_id = np.tile((struct.resindices + 1).astype(str), n_frames).tolist()
     pdbx_PDB_ins_code = np.tile(
         [x if x.strip() else "?" for x in struct.icodes], n_frames

diff --git a/src/chilife/Topology.py b/src/chilife/Topology.py
@@ -435,8 +435,6 @@ def bonds_from_ccd_data(molsys, ccd_data):
                     a1, a2, btype, bchiral = POLYMER_LINKAGE_TYPES[link_type]
                     i1 = pres.ix[pres.names == a1].flat[0]
                     i2 = res.ix[res.names == a2].flat[0]
-                    if i1 == 0 and i2 == 1255:
-                        print("break")
                     bonds.append([i1, i2])
                     bond_types.append(btype)
                     bond_chiral.append(bchiral)

diff --git a/src/chilife/__init__.py b/src/chilife/__init__.py
@@ -44,4 +44,4 @@
 # SpinLabel = SpinLabel.SpinLabel
 # dSpinLabel = dSpinLabel.dSpinLabel
 
-__version__ = "1.2.2"
+__version__ = "1.2.3"