Source code for mkado.core.codons

"""Genetic code and codon utilities."""

from __future__ import annotations

from typing import TYPE_CHECKING

from mkado.data.genetic_codes import (
    STANDARD_CODE,
    _build_code_table,
    _compute_codon_paths,
    get_codon_paths,
)

if TYPE_CHECKING:
    pass


[docs] class GeneticCode: """Represents a genetic code for translation and codon path computation."""
[docs] def __init__( self, code: dict[str, str] | None = None, *, table_id: int | None = None, ): """Initialize with a codon-to-amino-acid mapping. Args: code: Dict mapping codons to single-letter amino acids. Must be a complete mapping of all 64 codons. Uses standard code if neither code nor table_id is provided. table_id: NCBI genetic code table ID (1-33). If provided, overrides code. """ if table_id is not None: self.code = _build_code_table(table_id) self._table_id = table_id elif code is not None: self.code = code self._table_id = None else: self.code = STANDARD_CODE self._table_id = 1 # Use cached paths when constructed by table_id; compute for custom dicts if self._table_id is not None: self._paths = get_codon_paths(self._table_id) else: self._paths = _compute_codon_paths(self.code) # Eager per-instance lookup tables over the 64 standard codons. Keyspace # is fixed by self.code; non-standard inputs (N, -, lowercase, "XYZ", ...) # fall through the .get() default at lookup time. Avoiding @lru_cache on # bound methods is deliberate — that pattern keeps `self` alive via the # cache key, leaking instances (regression test in tests/test_codons.py). self._translate_cache: dict[str, str] = dict(self.code) self._syn_sites_cache: dict[str, float] = { codon: self._compute_syn_sites(codon) for codon in self.code }
[docs] def translate(self, codon: str) -> str: """Translate a codon to its amino acid. Args: codon: Three-letter codon string Returns: Single-letter amino acid code, or 'X' for unknown """ return self._translate_cache.get(codon.upper(), "X")
def _compute_syn_sites(self, codon: str) -> float: """Nei-Gojobori synonymous-site count for one codon. Used during ``__init__`` to populate ``_syn_sites_cache`` and not called afterwards. Reads ``_translate_cache``, which must already be built when this is invoked. """ aa = self._translate_cache.get(codon, "X") if aa == "*" or aa == "X": return 0.0 syn_sites = 0.0 for pos in range(3): syn_count = 0 total_count = 0 for nt in ("A", "C", "G", "T"): if nt == codon[pos]: continue new_codon = codon[:pos] + nt + codon[pos + 1 :] new_aa = self._translate_cache.get(new_codon, "X") if new_aa != "*": total_count += 1 if new_aa == aa: syn_count += 1 if total_count > 0: syn_sites += syn_count / total_count return syn_sites
[docs] def translate_sequence(self, sequence: str, reading_frame: int = 1) -> str: """Translate a nucleotide sequence to amino acids. Args: sequence: Nucleotide sequence string reading_frame: Reading frame (1, 2, or 3) Returns: Amino acid sequence string """ start = reading_frame - 1 amino_acids = [] for i in range(start, len(sequence) - 2, 3): codon = sequence[i : i + 3] amino_acids.append(self.translate(codon)) return "".join(amino_acids)
[docs] def get_path(self, codon1: str, codon2: str) -> list[tuple[str, int]]: """Get the shortest mutational path between two codons. Args: codon1: Starting codon codon2: Ending codon Returns: List of (change_type, position) tuples where change_type is 'R' for replacement (non-synonymous) or 'S' for synonymous, and position is the codon position (0, 1, or 2). """ codon1 = codon1.upper() codon2 = codon2.upper() return self._paths.get((codon1, codon2), [])
[docs] def count_synonymous_sites(self, codon: str) -> float: """Count the number of synonymous sites in a codon. Uses Nei-Gojobori method: for each site, calculate fraction of possible changes that are synonymous. Args: codon: Three-letter codon string Returns: Number of synonymous sites (0-3) """ codon = codon.upper() if "N" in codon or "-" in codon: return 0.0 return self._syn_sites_cache.get(codon, 0.0)
[docs] def is_synonymous_change(self, codon1: str, codon2: str) -> bool | None: """Check if a single-nucleotide change is synonymous. Args: codon1: Original codon codon2: Changed codon Returns: True if synonymous, False if non-synonymous, None if not a single-nucleotide change or invalid codons """ codon1 = codon1.upper() codon2 = codon2.upper() # Check single nucleotide difference diffs = sum(1 for i in range(3) if codon1[i] != codon2[i]) if diffs != 1: return None aa1 = self.translate(codon1) aa2 = self.translate(codon2) if aa1 == "X" or aa2 == "X": return None return aa1 == aa2
# Default genetic code instance DEFAULT_CODE = GeneticCode()