'''
======================
Stochastic Translation
======================
'''
import copy
import numpy as np
import random
import logging as log
from arrow import StochasticSystem
from vivarium.core.process import Process
from vivarium.core.experiment import pp
from vivarium.data.amino_acids import amino_acids
from vivarium.data.molecular_weight import molecular_weight
from vivarium.library.datum import Datum
from vivarium.library.polymerize import Elongation, Polymerase, Template, build_stoichiometry, all_products, generate_template
from vivarium.library.dict_utils import deep_merge
[docs]class Ribosome(Polymerase):
pass
[docs]class Transcript(Template):
pass
[docs]def shuffle(l):
l = [item for item in l]
np.random.shuffle(l)
return l
[docs]def random_string(alphabet, length):
string = ''
for step in range(length):
string += random.choice(alphabet)
return string
#: Variable name for unbound ribosomes
UNBOUND_RIBOSOME_KEY = 'Ribosome'
monomer_symbols = []
monomer_ids = []
for symbol, id in amino_acids.items():
monomer_symbols.append(symbol)
monomer_ids.append(id)
A = random_string(monomer_symbols, 20)
Z = random_string(monomer_symbols, 60)
B = random_string(monomer_symbols, 30)
Y = random_string(monomer_symbols, 40)
[docs]def gather_genes(affinities):
genes = {}
for operon, product in affinities.keys():
if not operon in genes:
genes[operon] = []
genes[operon].append(product)
return genes
[docs]def transcripts_to_gene_counts(transcripts, operons):
counts = {}
for transcript, genes in operons.items():
for gene in genes:
counts[(transcript, gene)] = transcripts.get(transcript, 0)
return counts
[docs]class Translation(Process):
name = 'translation'
defaults = {
'sequences': {
('oA', 'eA'): A,
('oAZ', 'eA'): A,
('oAZ', 'eZ'): Z,
('oB', 'eB'): B,
('oBY', 'eB'): B,
('oBY', 'eY'): Y},
'templates': {
('oA', 'eA'): generate_template(('oA', 'eA'), 20, ['eA']),
('oAZ', 'eA'): generate_template(('oAZ', 'eA'), 20, ['eA']),
('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 60, ['eZ']),
('oB', 'eB'): generate_template(('oB', 'eB'), 30, ['eB']),
('oBY', 'eB'): generate_template(('oBY', 'eB'), 30, ['eB']),
('oBY', 'eY'): generate_template(('oBY', 'eY'), 40, ['eY'])},
'transcript_affinities': {
('oA', 'eA'): 1.0,
('oAZ', 'eA'): 2.0,
('oAZ', 'eZ'): 5.0,
('oB', 'eB'): 1.0,
('oBY', 'eB'): 2.0,
('oBY', 'eY'): 5.0},
'elongation_rate': 5.0,
'polymerase_occlusion': 10,
'symbol_to_monomer': amino_acids,
'monomer_ids': monomer_ids,
'concentration_keys': [],
'mass_deriver_key': 'mass_deriver',
'concentrations_deriver_key': 'translation_concentrations',
'time_step': 1.0,
}
def __init__(self, initial_parameters=None):
'''A stochastic translation model
.. WARNING::
Vivarium's knowledge base uses the gene name to name the
protein. This means that for a gene acrA that codes for
protein ArcA, you must refer to the gene, transcript, and
protein each as acrA.
.. DANGER::
This documentation will need to be updated to reflect the
changes in `#185
<https://github.com/CovertLab/vivarium/pull/185>`_
:term:`Ports`:
* **ribosomes**: Expects the ``ribosomes`` variable, whose
value is a list of the configurations of the ribosomes
currently active.
* **molecules**: Expects variables for each of the RNA
nucleotides.
* **transcripts**: Expects variables for each transcript to
translate. Translation will read transcripts from this port.
* **proteins**: Expects variables for each protein product. The
produced proteins will be added to this port as counts.
* **concentrations**: Expects variables for each key in
``concentration_keys``. This will be used by a :term:`deriver`
to convert counts to concentrations.
Arguments:
initial_parameters: A dictionary of configuration options.
Accepts the following keys:
* **sequences** (:py:class:`dict`): Maps from operon
name to the RNA sequence of the operon, as a
:py:class:`str`.
* **templates** (:py:class:`dict`): Maps from the name
of an transcript to a :term:`template specification`.
The template specification may be generated by
:py:func:`vivarium.library.polymerize.generate_template`
like so:
>>> from vivarium.library.polymerize import (
... generate_template)
>>> from vivarium.library.pretty import format_dict
>>> terminator_index = 5
>>> template = generate_template(
... 'oA', terminator_index, ['product1'])
>>> print(format_dict(template))
{
"direction": 1,
"id": "oA",
"position": 0,
"sites": [],
"terminators": [
{
"position": 5,
"products": [
"product1"
],
"strength": 1.0
}
]
}
* **transcript_affinities** (:py:class:`dict`): A map
from the name of a transcript to the binding affinity
(a :py:class:`float`) of the ribosome for the
transcript.
* **elongation_rate** (:py:class:`float`): The
elongation rate of the ribosome.
.. todo:: Units of elongation rate
* **polymerase_occlusion** (:py:class:`int`): The number
of base pairs behind the polymerase where another
polymerase is occluded and so cannot bind.
* **symbol_to_monomer** (:py:class:`dict`): Maps from
the symbols used to represent monomers in the RNA
sequence to the name of the free monomer. This should
generally be
:py:data:`vivarium.data.amino_acids.amino_acids`.
* **monomer_ids** (:py:class:`list`): A list of the
names of the free monomers consumed by translation.
This can generally be computed as:
>>> import pprint
>>>
>>> from vivarium.data.amino_acids import amino_acids
>>> monomer_ids = amino_acids.values()
>>> pp = pprint.PrettyPrinter()
>>> pp.pprint(list(monomer_ids))
['Alanine',
'Arginine',
'Asparagine',
'Aspartate',
'Cysteine',
'Glutamate',
'Glutamine',
'Glycine',
'Histidine',
'Isoleucine',
'Leucine',
'Lysine',
'Methionine',
'Phenylalanine',
'Proline',
'Serine',
'Threonine',
'Tryptophan',
'Tyrosine',
'Valine']
Note that we only included the `list()` transformation
to make the output prettier. The `dict_values` object
returned by `.values()` is sufficiently list-like for
use here. Also note that :py:mod:`pprint` just makes
the output prettier.
* **concentration_keys** (:py:class:`list`): A list of
variables you want to be able to access as
concentrations from the *concentrations* port. The
actual conversion is handled by a deriver.
Example configuring the process (uses
:py:func:vivarium.library.pretty.format_dict):
>>> from vivarium.library.pretty import format_dict
>>> from vivarium.data.amino_acids import amino_acids
>>> from vivarium.library.polymerize import generate_template
>>> random.seed(0) # Needed because process is stochastic
>>> np.random.seed(0)
>>> configurations = {
... 'sequences': {
... ('oA', 'eA'): 'AWDPT',
... ('oAZ', 'eZ'): 'YVEGELENGGMFISC',
... },
... 'templates': {
... ('oA', 'eA'): generate_template(('oA', 'eA'), 5, ['eA']),
... ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 15, ['eA', 'eZ']),
... },
... 'transcript_affinities': {
... ('oA', 'eA'): 1.0,
... ('oAZ', 'eZ'): 1.0,
... },
... 'elongation_rate': 10.0,
... 'polymerase_occlusion': 10,
... 'symbol_to_monomer': amino_acids,
... 'monomer_ids': amino_acids.values(),
... 'concentration_keys': []
... }
>>> # make the translation process, and initialize the states
>>> translation = Translation(configurations) # doctest:+ELLIPSIS
>>> states = {
... 'ribosomes': {},
... 'molecules': {},
... 'proteins': {UNBOUND_RIBOSOME_KEY: 2},
... 'transcripts': {
... 'oA': 10,
... 'oAZ': 10,
... }
... }
>>> states['molecules'].update(
... {
... molecule_id: 100
... for molecule_id in translation.monomer_ids
... }
... )
>>> update = translation.next_update(1, states)
>>> print(update['ribosomes'])
{'_add': [{'path': (1,), 'state': <class 'vivarium.processes.translation.Ribosome'>: {'id': 1, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}, {'path': (2,), 'state': <class 'vivarium.processes.translation.Ribosome'>: {'id': 2, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}], '_delete': []}
'''
if not initial_parameters:
initial_parameters = {}
self.monomer_symbols = list(amino_acids.keys())
self.monomer_ids = list(amino_acids.values())
self.default_parameters = copy.deepcopy(self.defaults)
templates = self.or_default(initial_parameters, 'templates')
self.default_parameters['protein_ids'] = all_products({
key: Template(config)
for key, config in templates.items()})
self.default_parameters['transcript_order'] = list(
initial_parameters.get(
'transcript_affinities',
self.default_parameters['transcript_affinities']).keys())
self.default_parameters['molecule_ids'] = self.monomer_ids
self.parameters = copy.deepcopy(self.default_parameters)
self.parameters.update(initial_parameters)
self.sequences = self.parameters['sequences']
self.templates = self.parameters['templates']
self.transcript_affinities = self.parameters['transcript_affinities']
self.operons = gather_genes(self.transcript_affinities)
self.operon_order = list(self.operons.keys())
self.transcript_order = self.parameters['transcript_order']
self.transcript_count = len(self.transcript_order)
self.monomer_ids = self.parameters['monomer_ids']
self.molecule_ids = self.parameters['molecule_ids']
self.molecule_ids.extend(['ATP', 'ADP'])
self.protein_ids = self.parameters['protein_ids']
self.symbol_to_monomer = self.parameters['symbol_to_monomer']
self.elongation = 0
self.elongation_rate = self.parameters['elongation_rate']
self.polymerase_occlusion = self.parameters['polymerase_occlusion']
self.concentration_keys = self.parameters['concentration_keys']
self.affinity_vector = np.array([
self.transcript_affinities[transcript_key]
for transcript_key in self.transcript_order], dtype=np.float64)
self.stoichiometry = build_stoichiometry(self.transcript_count)
self.initiation = StochasticSystem(self.stoichiometry)
self.ribosome_id = 0
self.protein_keys = self.concentration_keys + self.protein_ids
self.all_protein_keys = self.protein_keys + [UNBOUND_RIBOSOME_KEY]
self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key')
self.concentrations_deriver_key = self.or_default(
initial_parameters, 'concentrations_deriver_key')
log.info('translation parameters: {}'.format(self.parameters))
super(Translation, self).__init__(self.parameters)
[docs] def ports_schema(self):
def add_mass(schema, masses, key):
if '_properties' not in schema:
schema['_properties'] = {}
if key in masses:
schema['_properties']['mw'] = masses[key]
return schema
return {
'ribosomes': {
'*': {
'id': {
'_default': -1,
'_updater': 'set'},
'domain': {
'_default': 0,
'_updater': 'set'},
'state': {
'_default': None,
'_updater': 'set',
'_emit': True},
'position': {
'_default': 0,
'_updater': 'set',
'_emit': True},
'template': {
'_default': None,
'_updater': 'set',
'_emit': True},
'template_index': {
'_default': 0,
'_updater': 'set',
'_emit': True}}},
'global': {},
'molecules': {
molecule: add_mass({
'_emit': True,
'_default': 0,
'_divider': 'split'}, molecular_weight, molecule)
for molecule in self.molecule_ids},
'transcripts': {
transcript: add_mass({
'_default': 0,
'_divider': 'split'}, molecular_weight, transcript)
for transcript in list(self.operons.keys())},
'proteins': {
protein: add_mass({
'_default': 0,
'_divider': 'split',
'_emit': True}, molecular_weight, protein)
for protein in self.all_protein_keys},
'concentrations': {
molecule: {
'_default': 0.0,
'_updater': 'set'}
for molecule in self.protein_keys}}
[docs] def derivers(self):
return {
self.mass_deriver_key: {
'deriver': 'mass_deriver',
'port_mapping': {
'global': 'global'}},
self.concentrations_deriver_key: {
'deriver': 'concentrations_deriver',
'port_mapping': {
'global': 'global',
'counts': 'proteins',
'concentrations': 'concentrations'},
'config': {
'concentration_keys': self.protein_keys}}}
[docs] def next_update(self, timestep, states):
molecules = states['molecules']
transcripts = states['transcripts']
proteins = states['proteins']
ribosomes = {
id: Ribosome(ribosome)
for id, ribosome in states['ribosomes'].items()}
original_ribosome_keys = ribosomes.keys()
gene_counts = np.array(
list(transcripts_to_gene_counts(transcripts, self.operons).values()),
dtype=np.int64)
# Find out how many transcripts are currently blocked by a
# newly initiated ribosome
bound_transcripts = np.zeros(self.transcript_count, dtype=np.int64)
ribosomes_by_transcript = {
transcript_key: []
for transcript_key in self.transcript_order}
for ribosome in ribosomes.values():
ribosomes_by_transcript[ribosome.template].append(ribosome)
for index, transcript in enumerate(self.transcript_order):
bound_transcripts[index] = len([
ribosome
for ribosome in ribosomes_by_transcript[transcript]
if ribosome.is_bound()])
# Make the state for a gillespie simulation out of total number of each
# transcript not blocked by a bound ribosome, concatenated with the number
# of each transcript that is bound by a ribosome.
# These are the two states for each transcript the simulation
# will operate on, essentially going back and forth between
# bound and unbound states.
original_unbound_ribosomes = proteins[UNBOUND_RIBOSOME_KEY]
monomer_limits = {
monomer: molecules[monomer]
for monomer in self.monomer_ids}
unbound_ribosomes = original_unbound_ribosomes
templates = {
key: Template(template)
for key, template in self.templates.items()}
time = 0
now = 0
elongation = Elongation(
self.sequences,
templates,
monomer_limits,
self.symbol_to_monomer,
self.elongation)
while time < timestep:
# build the state vector for the gillespie simulation
substrate = np.concatenate([
gene_counts - bound_transcripts,
bound_transcripts,
[unbound_ribosomes]])
# find number of monomers until next terminator
distance = 1 / self.elongation_rate
# find interval of time that elongates to the point of the next terminator
interval = min(distance, timestep - time)
if interval == distance:
# perform the elongation until the next event
terminations, monomer_limits, ribosomes = elongation.step(
interval,
monomer_limits,
ribosomes)
unbound_ribosomes += terminations
else:
elongation.store_partial(interval)
terminations = 0
# run simulation for interval of time to next terminator
result = self.initiation.evolve(
interval,
substrate,
self.affinity_vector)
# go through each event in the simulation and update the state
ribosome_bindings = 0
for now, event in zip(result['time'], result['events']):
# ribosome has bound the transcript
transcript_key = self.transcript_order[event]
bound_transcripts[event] += 1
self.ribosome_id += 1
new_ribosome = Ribosome({
'id': self.ribosome_id,
'template': transcript_key,
'position': 0})
new_ribosome.bind()
new_ribosome.start_polymerizing()
ribosomes[new_ribosome.id] = new_ribosome
ribosome_bindings += 1
unbound_ribosomes -= 1
# deal with occluding rnap
for ribosome in ribosomes.values():
if ribosome.is_unoccluding(self.polymerase_occlusion):
bound_transcripts[ribosome.template_index] -= 1
ribosome.unocclude()
time += interval
# track how far elongation proceeded to start from next iteration
self.elongation = elongation.elongation - int(elongation.elongation)
proteins = {
UNBOUND_RIBOSOME_KEY: unbound_ribosomes - original_unbound_ribosomes}
proteins.update(elongation.complete_polymers)
molecules = {
key: count * -1
for key, count in elongation.monomers.items()}
original = set(original_ribosome_keys)
current = set(ribosomes.keys())
bound_ribosomes = current - original
completed_ribosomes = original - current
continuing_ribosomes = original - completed_ribosomes
# ATP hydrolysis cost is 2 per amino acid elongation
molecules['ATP'] = 0
molecules['ADP'] = 0
for count in elongation.monomers.values():
molecules['ATP'] -= 2 * count
molecules['ADP'] += 2 * count
ribosome_updates = {
id: ribosomes[id]
for id in continuing_ribosomes}
add_ribosomes = [
{'path': (bound,), 'state': ribosomes[bound]}
for bound in bound_ribosomes]
delete_ribosomes = [
(completed,)
for completed in completed_ribosomes]
ribosome_updates['_add'] = add_ribosomes
ribosome_updates['_delete'] = delete_ribosomes
update = {
'ribosomes': ribosome_updates,
'molecules': molecules,
'proteins': proteins}
return update
[docs]def test_translation():
parameters = {}
translation = Translation(parameters)
states = {
'ribosomes': {},
'molecules': {'ATP': 100000},
'proteins': {UNBOUND_RIBOSOME_KEY: 10},
'transcripts': {
'oA': 10,
'oAZ': 10,
'oB': 10,
'oBY': 10}}
states['molecules'].update({
molecule_id: 100
for molecule_id in translation.monomer_ids})
update = translation.next_update(10.0, states)
pp(update)
print('complete!')
if __name__ == '__main__':
test_translation()