'''
========================
Stochastic Transcription
========================
'''
import os
import copy
import numpy as np
import logging as log
from arrow import StochasticSystem
from vivarium.library.units import units
from vivarium.library.dict_utils import deep_merge, keys_list
from vivarium.core.experiment import pp, pf
from vivarium.core.process import Process
from vivarium.core.composition import process_in_experiment
from vivarium.states.chromosome import Chromosome, Rnap, Promoter, frequencies, add_merge, toy_chromosome_config
from vivarium.library.polymerize import Elongation, build_stoichiometry, template_products
from vivarium.data.nucleotides import nucleotides
[docs]def choose_element(elements):
if elements:
choice = np.random.choice(len(elements), 1)
return list(elements)[int(choice)]
#: Variable name for unbound RNA polymerase
UNBOUND_RNAP_KEY = 'RNA Polymerase'
monomer_ids = list(nucleotides.values())
#: The default configuration parameters for :py:class:`Transcription`
[docs]class Transcription(Process):
name = 'transcription'
defaults = {
'promoter_affinities': {},
'transcription_factors': [],
'sequence': '',
'templates': {},
'genes': {},
'elongation_rate': 1.0,
'polymerase_occlusion': 5,
'symbol_to_monomer': nucleotides,
'monomer_ids': monomer_ids,
'concentrations_deriver_key': 'transcription_concentrations',
'initial_domains': {
0: {
'id': 0,
'lead': 0,
'lag': 0,
'children': []}},
'molecule_ids': monomer_ids,
'time_step': 1.0,
}
def __init__(self, initial_parameters=None):
'''A stochastic transcription model
.. WARNING:: Vivarium's knowledge base uses the gene name to
name the protein. This means that for a gene acrA that
codes for protein AcrA, you must refer to the gene,
transcript, and protein each as acrA.
:term:`Ports`:
* **chromosome**: The linked :term:`store` should hold a
representation of the chromosome in the form returned by
:py:meth:`vivarium.states.chromosome.Chromosome.to_dict`.
* **molecules**: Expects variables with the names in the
*molecule_ids* configuration. These are the monomers consumed
by transcription.
* **factors**: Expects variables for each transcription factor's
concentration.
* **transcripts**: The linked store should store the
concentrations of the transcripts.
* **proteins**: The linked store should hold the concentrations
of the transcription factors and the RNA polymerase.
Arguments:
initial_parameters: The following configuration options may
be provided:
* **promoter_affinities** (:py:class:`dict`): Maps from
binding state tuples to the binding affinity of RNA
polymerase and the promoter when the promoter is at
that binding state. The binding state of a promoter is
which (if any) transcription factors are bound to the
promoter. Such a binding state can be represented by a
binding state tuple, which is a :py:class:`tuple`
whose first element is the name of the promoter. All
bound transcription factors are listed as subsequent
elements. If no transcription factors are bound, the
sole subsequent element is ``None``.
.. todo:: What is the significance of the order in the
binding state tuple?
.. todo:: What are the units of the affinities?
* **transcription_factors** (:py:class:`list`): A list
of all modeled transcription factors.
* **sequence**: The DNA sequence that includes all the
genes whose transcription is being modeled.
* **templates** (:py:class:`dict`): Maps from the name
of an operon to that operon's :term:`template
specification`.
* **genes** (:py:class:`dict`): Maps from operon name to
a list of the names of the genes in that operon.
* **elongation_rate** (:py:class:`float`): The
elongation rate of the RNA polymerase.
* **polymerase_occlusion** (:py:class:`int`): The number
of base pairs behind the polymerase where another
polymerase is occluded and so cannot bind.
* **symbol_to_monomer** (:py:class:`dict`): Maps from
the symbols used to represent monomers in the RNA
sequence to the name of the free monomer. This should
generally be
:py:data:`vivarium.data.nucleotides.nucleotides`.
* **monomer_ids** (:py:class:`list`): A list of the
names of the free monomers consumed by transcription.
This can generally be computed as:
>>> from vivarium.data.nucleotides import nucleotides
>>> monomer_ids = nucleotides.values()
>>> print(list(monomer_ids))
['ATP', 'GTP', 'UTP', 'CTP']
Note that we only included the ``list()``
transformation to make the output prettier. The
``dict_values`` object returned by the ``.values()``
call is sufficiently list-like for use here.
* **molecule_ids** (:py:class:`list`): A list of all the
molecules needed by the :term:`process`. This will
generally be the same as *monomer_ids*.
Example configuring the process (uses
:py:func:`vivarium.library.pretty.format_dict`):
>>> import random
>>>
>>> import numpy as np
>>>
>>> from vivarium.states.chromosome import (
... toy_chromosome_config,
... Chromosome,
... )
>>> from vivarium.data.nucleotides import nucleotides
>>> # format_dict lets us print dictionaries prettily
>>> from vivarium.library.pretty import format_dict
>>>
>>> random.seed(0) # Needed because process is stochastic
>>> np.random.seed(0)
>>> # We will use the toy chromosome from toy_chromosome_config
>>> print(toy_chromosome_config)
{'sequence': 'ATACGGCACGTGACCGTCAACTTA', 'genes': {'oA': ['eA'], 'oAZ': ['eA', 'eZ'], 'oB': ['eB'], 'oBY': ['eB', 'eY']}, 'promoter_order': ['pA', 'pB'], 'promoters': {'pA': {'id': 'pA', 'position': 3, 'direction': 1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfA': <Quantity(0.3, 'millimolar')>}}], 'terminators': [{'position': 6, 'strength': 0.5, 'products': ['oA']}, {'position': 12, 'strength': 1.0, 'products': ['oAZ']}]}, 'pB': {'id': 'pB', 'position': -3, 'direction': -1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfB': <Quantity(0.5, 'millimolar')>}}], 'terminators': [{'position': -9, 'strength': 0.5, 'products': ['oB']}, {'position': -12, 'strength': 1.0, 'products': ['oBY']}]}}, 'promoter_affinities': {('pA', None): 1.0, ('pA', 'tfA'): 10.0, ('pB', None): 1.0, ('pB', 'tfB'): 10.0}, 'domains': {0: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'rnaps': {}}
>>> monomer_ids = list(nucleotides.values())
>>> configuration = {
... 'promoter_affinities': {
... ('pA', None): 1.0,
... ('pA', 'tfA'): 10.0,
... ('pB', None): 1.0,
... ('pB', 'tfB'): 10.0},
... 'transcription_factors': ['tfA', 'tfB'],
... 'sequence': toy_chromosome_config['sequence'],
... 'templates': toy_chromosome_config['promoters'],
... 'genes': toy_chromosome_config['genes'],
... 'elongation_rate': 10.0,
... 'polymerase_occlusion': 5,
... 'symbol_to_monomer': nucleotides,
... 'monomer_ids': monomer_ids,
... 'molecule_ids': monomer_ids,
... }
>>> # At this point we haven't used the toy chromosome yet
>>> # because it will be specified in the chromosome port.
>>> # Notice that the parameters are specific to the chromosome.
>>> transcription_process = Transcription(configuration)
>>> # Now we need to initialize the simulation stores
>>> state = {
... 'chromosome': toy_chromosome_config,
... 'molecules': {
... nucleotide: 10
... for nucleotide in monomer_ids
... },
... 'proteins': {UNBOUND_RNAP_KEY: 10},
... 'factors': {'tfA': 0.2 * units.mM, 'tfB': 0.7 * units.mM},
... }
>>> update = transcription_process.next_update(1.0, state)
>>> print(update['chromosome'])
{'rnaps': {'_add': [{'path': (2,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 2, 'template': 'pA', 'template_index': 0, 'terminator': 1, 'domain': 0, 'state': 'polymerizing', 'position': 7}}, {'path': (3,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 3, 'template': 'pB', 'template_index': 1, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 3}}, {'path': (4,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 4, 'template': 'pA', 'template_index': 0, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 0}}], '_delete': []}, 'rnap_id': 4, 'domains': {0: <class 'vivarium.states.chromosome.Domain'>: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'root_domain': 0}
'''
if not initial_parameters:
initial_parameters = {}
log.debug('inital transcription parameters: {}'.format(initial_parameters))
super(Transcription, self).__init__(initial_parameters)
self.derive_defaults('templates', 'promoter_order', keys_list)
self.derive_defaults('templates', 'transcript_ids', template_products)
self.sequence = self.parameters['sequence']
self.templates = self.parameters['templates']
self.genes = self.parameters['genes']
empty_chromosome = Chromosome({
'sequence': self.sequence,
'promoters': self.templates,
'genes': self.genes})
self.sequences = empty_chromosome.sequences()
self.symbol_to_monomer = self.parameters['symbol_to_monomer']
log.debug('chromosome sequence: {}'.format(self.sequence))
self.promoter_affinities = self.parameters['promoter_affinities']
self.promoter_order = self.parameters['promoter_order']
self.promoter_count = len(self.promoter_order)
self.transcription_factors = self.parameters['transcription_factors']
self.molecule_ids = self.parameters['molecule_ids']
self.molecule_ids.extend(['ATP', 'ADP'])
self.monomer_ids = self.parameters['monomer_ids']
self.transcript_ids = self.parameters['transcript_ids']
self.elongation = 0
self.elongation_rate = self.parameters['elongation_rate']
self.polymerase_occlusion = self.parameters['polymerase_occlusion']
self.stoichiometry = build_stoichiometry(self.promoter_count)
self.initiation = StochasticSystem(self.stoichiometry, random_seed=np.random.randint(2**31))
self.protein_ids = [UNBOUND_RNAP_KEY] + self.transcription_factors
self.initial_domains = self.parameters['initial_domains']
self.concentrations_deriver_key = self.parameters['concentrations_deriver_key']
self.chromosome_ports = ['rnaps', 'rnap_id', 'domains', 'root_domain']
log.debug('final transcription parameters: {}'.format(self.parameters))
[docs] def build_affinity_vector(self, promoters, factors):
vector = np.zeros(len(self.promoter_order), dtype=np.float64)
for index, promoter_key in enumerate(self.promoter_order):
promoter = promoters[promoter_key]
binding = promoter.binding_state(factors)
affinity = self.promoter_affinities.get(binding, 0.0)
# print('promoter state - {}: {}'.format(binding, affinity))
vector[index] = affinity
return vector
[docs] def chromosome_config(self, chromosome_states):
return dict(
chromosome_states,
sequence=self.sequence,
promoters=self.templates,
promoter_order=self.promoter_order,
genes=self.genes)
[docs] def ports_schema(self):
schema = {}
schema['chromosome'] = {
'rnap_id': {
'_default': 1,
'_updater': 'set'},
'root_domain': {
'_default': 0,
'_updater': 'set'},
'domains': {
'*': {
'id': {
'_default': 1,
'_updater': 'set'},
'lead': {
'_default': 0,
'_updater': 'set'},
'lag': {
'_default': 0,
'_updater': 'set'},
'children': {
'_default': [],
'_updater': 'set'}}},
'rnaps': {
'*': {
'id': {
'_default': -1,
'_updater': 'set'},
'domain': {
'_default': 0,
'_updater': 'set'},
'state': {
'_default': None,
'_updater': 'set',
'_emit': True},
'position': {
'_default': 0,
'_updater': 'set',
'_emit': True},
'template': {
'_default': None,
'_updater': 'set',
'_emit': True},
'template_index': {
'_default': 0,
'_updater': 'set',
'_emit': True},
'terminator': {
'_default': 0,
'_updater': 'set',
'_emit': True}}}}
initial_domains = {
id: {
'id': {
'_default': id,
'_updater': 'set'},
'lead': {
'_default': 0,
'_updater': 'set'},
'lag': {
'_default': 0,
'_updater': 'set'},
'children': {
'_default': [],
'_updater': 'set'}}
for id, domain in self.initial_domains.items()}
schema['chromosome']['domains'].update(initial_domains)
schema['molecules'] = {
molecule: {
'_default': 0,
'_divider': 'split',
'_emit': True}
for molecule in self.molecule_ids}
schema['factors'] = {
factor: {
'_default': 0.0,
'_divider': 'split'}
for factor in self.transcription_factors}
schema['transcripts'] = {
protein: {
'_default': 0,
'_divider': 'split',
'_emit': True}
for protein in self.transcript_ids}
schema['proteins'] = {
protein: {
'_default': 0,
'_divider': 'split',
'_emit': True}
for protein in self.protein_ids}
schema['global'] = {}
return schema
[docs] def derivers(self):
return {
self.concentrations_deriver_key: {
'deriver': 'concentrations_deriver',
'port_mapping': {
'global': 'global',
'counts': 'proteins',
'concentrations': 'factors'},
'config': {
'concentration_keys': self.transcription_factors}}}
[docs] def next_update(self, timestep, states):
chromosome_state = states['chromosome']
# chromosome_state['rnaps'] = list(chromosome_state['rnaps'].values())
original_rnap_keys = [
rnap['id'] for rnap in chromosome_state['rnaps'].values()]
chromosome = Chromosome(
self.chromosome_config(chromosome_state))
molecules = states['molecules']
proteins = states['proteins']
factors = states['factors'] # as concentrations
promoter_rnaps = chromosome.promoter_rnaps()
promoter_domains = chromosome.promoter_domains()
# Find out how many promoters are currently blocked by a
# newly initiated or occluding rnap
promoter_count = len(chromosome.promoter_order)
blocked_promoters = np.zeros(promoter_count, dtype=np.int64)
open_domains = {}
bound_domains = {}
for promoter_index, promoter_key in enumerate(chromosome.promoter_order):
domains = []
for rnap in promoter_rnaps.get(promoter_key, {}).values():
if rnap.is_occluding():
domains.append(rnap.domain)
blocked_promoters[promoter_index] += 1
bound_domains[promoter_key] = set(domains)
open_domains[promoter_key] = promoter_domains[promoter_key] - bound_domains[promoter_key]
blocked_promoters = np.array(blocked_promoters)
# Make the state for a gillespie simulation out of total number of each
# promoter by copy number not blocked by initiated rnap,
# concatenated with the number of each promoter that is bound by rnap.
# These are the two states for each promoter the simulation
# will operate on, essentially going back and forth between
# bound and unbound states.
copy_numbers = chromosome.promoter_copy_numbers()
original_unbound_rnaps = proteins[UNBOUND_RNAP_KEY]
monomer_limits = {
monomer: molecules[monomer]
for monomer in self.monomer_ids}
unbound_rnaps = original_unbound_rnaps
time = 0
now = 0
elongation = Elongation(
self.sequences,
chromosome.promoters,
monomer_limits,
self.symbol_to_monomer,
self.elongation)
initiation_affinity = self.build_affinity_vector(chromosome.promoters, factors)
while time < timestep:
# build the state vector for the gillespie simulation
substrate = np.concatenate([
copy_numbers - blocked_promoters,
blocked_promoters,
[unbound_rnaps]])
log.debug('transcription substrate: {}'.format(substrate))
log.debug('blocked promoters: {}'.format(blocked_promoters))
# find number of monomers until next terminator
distance = 1 / self.elongation_rate # chromosome.terminator_distance()
# find interval of time that elongates to the point of the next terminator
interval = min(distance, timestep - time)
if interval == distance:
# perform the elongation until the next event
terminations, monomer_limits, chromosome.rnaps = elongation.step(
interval,
monomer_limits,
chromosome.rnaps)
unbound_rnaps += terminations
else:
elongation.store_partial(interval)
terminations = 0
log.debug('time: {} --- interval: {}'.format(time, interval))
log.debug('monomer limits: {}'.format(monomer_limits))
log.debug('terminations: {}'.format(terminations))
# run simulation for interval of time to next terminator
result = self.initiation.evolve(
interval,
substrate,
initiation_affinity)
log.debug('result: {}'.format(result))
# perform binding
for now, event in zip(result['time'], result['events']):
# RNAP has bound the promoter
promoter_key = chromosome.promoter_order[event]
promoter = chromosome.promoters[promoter_key]
domains = open_domains[promoter_key]
domain = choose_element(domains)
blocked_promoters[event] += 1
bound_domains[promoter_key].add(domain)
open_domains[promoter_key].remove(domain)
# create a new bound RNAP and add it to the chromosome.
new_rnap = chromosome.bind_rnap(event, domain)
new_rnap.start_polymerizing()
log.debug('newly bound RNAP: {}'.format(new_rnap))
unbound_rnaps -= 1
# deal with occluding rnap
for rnap in chromosome.rnaps.values():
if rnap.is_unoccluding(self.polymerase_occlusion):
log.debug('RNAP unoccluding: {}'.format(rnap))
blocked_promoters[rnap.template_index] -= 1
bound_domains[rnap.template].remove(rnap.domain)
open_domains[rnap.template].add(rnap.domain)
rnap.unocclude()
log.debug('rnap: {}'.format(rnap))
log.debug('complete: {}'.format(elongation.complete_polymers))
time += interval
# track how far elongation proceeded to start from next iteration
self.elongation = elongation.elongation - int(elongation.elongation)
proteins = {
UNBOUND_RNAP_KEY: unbound_rnaps - original_unbound_rnaps}
molecules = {
key: count * -1
for key, count in elongation.monomers.items()}
# 1 ATP hydrolysis cost per nucleotide elongation
molecules['ATP'] = 0
molecules['ADP'] = 0
for count in elongation.monomers.values():
molecules['ATP'] -= count
molecules['ADP'] += count
chromosome_dict = chromosome.to_dict()
rnaps = chromosome_dict['rnaps']
original = set(original_rnap_keys)
current = set(rnaps.keys())
bound_rnaps = current - original
completed_rnaps = original - current
continuing_rnaps = original - completed_rnaps
rnap_updates = {
rnap_id: rnaps[rnap_id]
for rnap_id in continuing_rnaps}
add_rnaps = [
{'path': (bound,), 'state': rnaps[bound]}
for bound in bound_rnaps]
delete_rnaps = [
(completed,)
for completed in completed_rnaps]
rnap_updates['_add'] = add_rnaps
rnap_updates['_delete'] = delete_rnaps
chromosome_dict['rnaps'] = rnap_updates
update = {
'chromosome': {
key: chromosome_dict[key]
for key in self.chromosome_ports},
'proteins': proteins,
'molecules': molecules,
'transcripts': elongation.complete_polymers}
log.debug('molecules update: {}'.format(update['molecules']))
return update
[docs]def test_transcription():
parameters = {
'sequence': toy_chromosome_config['sequence'],
'templates': toy_chromosome_config['promoters'],
'genes': toy_chromosome_config['genes'],
'promoter_affinities': toy_chromosome_config['promoter_affinities'],
'transcription_factors': ['tfA', 'tfB'],
'elongation_rate': 10.0}
chromosome = Chromosome(toy_chromosome_config)
transcription = Transcription(parameters)
initial_molecules = {
nucleotide: 10
for nucleotide in transcription.monomer_ids}
initial_molecules['ATP'] = 100000
experiment = process_in_experiment(transcription, {
'initial_state': {
'chromosome': chromosome.to_dict(),
'molecules': initial_molecules,
'proteins': {UNBOUND_RNAP_KEY: 10},
'factors': {'tfA': 0.2, 'tfB': 0.7}}})
pp(experiment.state.get_value())
experiment.update(10.0)
pp(experiment.state.get_value())
print('complete!')
if __name__ == '__main__':
test_transcription()