Source code for vivarium.compartments.gene_expression

from __future__ import absolute_import, division, print_function

import os
import copy

import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import networkx as nx

from vivarium.core.process import Generator
from vivarium.core.composition import (
    COMPARTMENT_OUT_DIR,
    simulate_compartment_in_experiment,
    plot_simulation_output,
)
from vivarium.library.make_network import save_network
from vivarium.library.units import units
from vivarium.library.dict_utils import deep_merge

# processes
from vivarium.data.amino_acids import amino_acids
from vivarium.processes.tree_mass import TreeMass
from vivarium.processes.transcription import Transcription, UNBOUND_RNAP_KEY
from vivarium.processes.translation import Translation, UNBOUND_RIBOSOME_KEY
from vivarium.processes.degradation import RnaDegradation
from vivarium.processes.complexation import Complexation
from vivarium.processes.division_volume import DivisionVolume
from vivarium.data.amino_acids import amino_acids
from vivarium.data.nucleotides import nucleotides
from vivarium.states.chromosome import toy_chromosome_config


NAME = 'gene_expression'

[docs]class GeneExpression(Generator):

    defaults = {
        'global_path': ('global',),
        'initial_mass': 1339.0 * units.fg,
        'time_step': 1.0,
        'transcription': {},
        'translation': {},
        'degradation': {},
        'complexation': {},
    }

    def __init__(self, config):
        super(GeneExpression, self).__init__(config)

[docs]    def generate_processes(self, config):
        transcription_config = config['transcription']
        translation_config = config['translation']
        degradation_config = config['degradation']
        complexation_config = config['complexation']

        # update timestep
        transcription_config.update({'time_step': config['time_step']})
        translation_config.update({'time_step': config['time_step']})
        degradation_config.update({'time_step': config['time_step']})
        complexation_config.update({'time_step': config['time_step']})

        # make the processes
        transcription = Transcription(transcription_config)
        translation = Translation(translation_config)
        degradation = RnaDegradation(degradation_config)
        complexation = Complexation(complexation_config)
        mass_deriver = TreeMass(config.get('mass_deriver', {
            'initial_mass': config['initial_mass']}))
        division = DivisionVolume(config)

        return {
            'mass_deriver': mass_deriver,
            'transcription': transcription,
            'translation': translation,
            'degradation': degradation,
            'complexation': complexation,
            'division': division}

[docs]    def generate_topology(self, config):
        global_path = config['global_path']

        return {
            'mass_deriver': {
                'global': global_path},

            'transcription': {
                'chromosome': ('chromosome',),
                'molecules': ('molecules',),
                'proteins': ('proteins',),
                'transcripts': ('transcripts',),
                'factors': ('concentrations',),
                'global': global_path},

            'translation': {
                'ribosomes': ('ribosomes',),
                'molecules': ('molecules',),
                'transcripts': ('transcripts',),
                'proteins': ('proteins',),
                'concentrations': ('concentrations',),
                'global': global_path},

            'degradation': {
                'transcripts': ('transcripts',),
                'proteins': ('proteins',),
                'molecules': ('molecules',),
                'global': global_path},

            'complexation': {
                'monomers': ('proteins',),
                'complexes': ('proteins',),
                'global': global_path},

            'division': {
                'global': global_path}}



# analysis
[docs]def gene_network_plot(data, out_dir, filename='gene_network'):
    '''
    Make a gene network plot from configuration data

    - data (dict):
        {
        'operons': operons, the "genes" in a chromosome config with {operon: [genes list]}
        'templates': promoters, the "promoters" in a chromosome config with {promoter: {sites: [], thresholds: []}}
        'complexes': complexes, stoichiometry from a complexation process.
        }

    '''
    node_size = 400
    node_distance = 30
    edge_weight = 1
    iterations = 1000

    operon_suffix = '_o'
    tf_suffix = '_tf'
    promoter_suffix = '_p'
    gene_suffix = '_g'
    complex_suffix = '_cxn'

    # plotting parameters
    color_legend = {
        'operon': [x/255 for x in [199,164,53]],
        'gene': [x / 255 for x in [181,99,206]],
        'promoter': [x / 255 for x in [110,196,86]],
        'transcription_factor': [x / 255 for x in [222,85,80]],
        'complex': [x / 255 for x in [153, 204, 255]],
    }

    # get data
    operons = data.get('operons', {})
    templates = data.get('templates', {})
    complexes = data.get('complexes', {})

    # make graph from templates
    G = nx.Graph()

    # initialize node lists for graphing
    operon_nodes = set()
    gene_nodes = set()
    promoter_nodes = set()
    tf_nodes = set()
    complex_nodes = set()

    edges = []

    # add operon --> gene connections
    for operon, genes in operons.items():
        operon_name = operon + operon_suffix
        gene_names = [gene + gene_suffix for gene in genes]

        operon_nodes.add(operon_name)
        gene_nodes.update(gene_names)

        G.add_node(operon_name)
        G.add_nodes_from(gene_names)

        # set node attributes
        operon_attrs = {operon_name: {'type': 'operon'}}
        gene_attrs = {gene: {'type': 'gene'} for gene in gene_names}
        nx.set_node_attributes(G, operon_attrs)
        nx.set_node_attributes(G, gene_attrs)

        # add operon --> gene edge
        for gene_name in gene_names:
            edge = (operon_name, gene_name)
            edges.append(edge)
            G.add_edge(operon_name, gene_name)

    # add transcription factor --> promoter --> operon connections
    for promoter, specs in templates.items():
        promoter_name = promoter + promoter_suffix

        promoter_nodes.add(promoter_name)
        G.add_node(promoter_name)

        # set node attributes
        promoter_attrs = {promoter_name: {'type': 'promoter'}}
        nx.set_node_attributes(G, promoter_attrs)

        # get sites and terminators
        promoter_sites = specs['sites']
        terminators = specs['terminators']

        # add transcription factors
        for site in promoter_sites:
            thresholds = site['thresholds']
            for threshold in thresholds:
                tf_name = threshold + tf_suffix

                tf_nodes.add(tf_name)
                G.add_node(tf_name)

                # set node attributes
                tf_attrs = {tf_name: {'type': 'transcription_factor'}}
                nx.set_node_attributes(G, tf_attrs)

                # connect transcription_factor --> promoter
                edge = (tf_name, promoter_name)
                edges.append(edge)
                G.add_edge(tf_name, promoter_name)

        # add gene products
        for site in terminators:
            products = site['products']
            for product in products:
                operon_name = product + operon_suffix

                operon_nodes.add(operon_name)
                G.add_node(operon_name)

                # set node attributes
                operon_attrs = {operon_name: {'type': 'operon'}}
                nx.set_node_attributes(G, operon_attrs)

                # connect promoter --> operon
                edge = (promoter_name, operon_name)
                edges.append(edge)
                G.add_edge(promoter_name, operon_name)


    ## add gene product --> complex
    # first get the sets of complexes and reactants.
    complex_set = set()
    monomer_set = set()
    for complex, stoichiometry in complexes.items():
        complex_list = [mol_id for mol_id, coeff in stoichiometry.items() if coeff>0]
        reactant_list = [mol_id for mol_id, coeff in stoichiometry.items() if coeff<0]

        complex_set.update(complex_list)
        monomer_set.update(reactant_list)

    # complexes are removed from reactants
    for complex in complex_set:
        if complex in monomer_set:
            monomer_set.remove(complex)

    # add nodes and edges
    for complex, stoichiometry in complexes.items():
        complexes = [mol_id for mol_id, coeff in stoichiometry.items() if coeff>0]
        reactants = {mol_id: coeff for mol_id, coeff in stoichiometry.items() if coeff<0}

        assert len(complexes) == 1, 'too many complexes'
        complex = complexes[0]
        complex_name = complex + complex_suffix

        complex_nodes.add(complex_name)
        G.add_node(complex_name)

        # set node attributes
        complex_attrs = {complex_name: {'type': 'complex'}}
        nx.set_node_attributes(G, complex_attrs)

        # make edge
        for reactant, coeff in reactants.items():

            # is this reactant a monomer or a complex?
            if reactant in monomer_set:
                # TODO -- check that it is actually included in the genes?
                reactant_name = reactant + gene_suffix
            elif reactant in complex_set:
                reactant_name = reactant + complex_suffix

            # connect reactant --> complex
            edge = (reactant_name, complex_name)
            edges.append(edge)
            G.add_edge(reactant_name, complex_name)

    # add edges between proteins/complexes and transcription factors that share the same name
    tf_names = [node.replace(tf_suffix, '') for node in tf_nodes]
    for node in gene_nodes:
        gene_name = node.replace(gene_suffix, '')
        if gene_name in tf_names:
            tf_node = gene_name + tf_suffix
            edge = (node, tf_node)
            edges.append(edge)
            G.add_edge(node, tf_node)

    for node in complex_nodes:
        complex_name = node.replace(complex_suffix, '')
        if complex_name in tf_names:
            tf_node = complex_name + tf_suffix
            edge = (node, tf_node)
            edges.append(edge)
            G.add_edge(node, tf_node)

    # separate out the disconnected graphs
    subgraphs = sorted(nx.connected_components(G), key = len, reverse=True)

    # make node labels by removing suffix
    node_labels = {}
    o_labels = {node: node.replace(operon_suffix,'') for node in operon_nodes}
    g_labels = {node: node.replace(gene_suffix, '') for node in gene_nodes}
    p_labels = {node: node.replace(promoter_suffix, '') for node in promoter_nodes}
    tf_labels = {node: node.replace(tf_suffix, '') for node in tf_nodes}
    cxn_labels = {node: node.replace(complex_suffix, '') for node in complex_nodes}
    node_labels.update(o_labels)
    node_labels.update(g_labels)
    node_labels.update(p_labels)
    node_labels.update(tf_labels)
    node_labels.update(cxn_labels)

    # save network for use in gephi
    gephi_nodes = {}
    gephi_edges = [[node1, node2, edge_weight] for (node1, node2) in edges]
    for node_id in operon_nodes:
        gephi_nodes[node_id] = {
            'label': o_labels[node_id],
            'type': 'operon',
            'size': 1}
    for node_id in gene_nodes:
        gephi_nodes[node_id] = {
            'label': g_labels[node_id],
            'type': 'gene',
            'size': 1}
    for node_id in promoter_nodes:
        gephi_nodes[node_id] = {
            'label': p_labels[node_id],
            'type': 'promoter',
            'size': 1}
    for node_id in tf_nodes:
        gephi_nodes[node_id] = {
            'label': tf_labels[node_id],
            'type': 'transcription factor',
            'size': 1}
    for node_id in complex_nodes:
        gephi_nodes[node_id] = {
            'label': cxn_labels[node_id],
            'type': 'complex',
            'size': 1}
    save_network(gephi_nodes, gephi_edges, out_dir)


    # plot
    n_rows = len(list(subgraphs))
    n_cols = 1
    fig = plt.figure(3, figsize=(6*n_cols, 6*n_rows))
    grid = plt.GridSpec(n_rows, n_cols, wspace=0.2, hspace=0.2)

    for idx, subgraph_nodes in enumerate(subgraphs):
        ax = fig.add_subplot(grid[idx, 0])

        subgraph = G.subgraph(list(subgraph_nodes))
        subgraph_node_labels = {node: node_labels[node] for node in subgraph_nodes}

        # get positions
        dist = node_distance / (len(subgraph)**0.5)
        # pos = nx.spring_layout(subgraph, k=dist, iterations=iterations)
        pos = nx.spring_layout(subgraph,
                               k=dist,
                               scale=20,
                               iterations=iterations)

        color_map = []
        for node in subgraph:
            type = subgraph.nodes[node]['type']
            node_color = color_legend[type]
            color_map.append(node_color)

        nx.draw(subgraph, pos, node_size=node_size, node_color=color_map)

        # edges
        # colors = list(range(1,len(edges)+1))
        nx.draw_networkx_edges(subgraph, pos,
                               # edge_color=colors,
                               width=1.5)

        nx.draw_networkx_labels(subgraph, pos,
                                labels=subgraph_node_labels,
                                font_size=8)

        if idx == 0:
            # make legend
            legend_elements = [Patch(facecolor=color, label=name) for name, color in color_legend.items()]
            plt.legend(handles=legend_elements, bbox_to_anchor=(1.5, 1.0))

    # save figure
    fig_path = os.path.join(out_dir, filename)
    plt.figure(3, figsize=(12, 12))
    plt.axis('off')
    plt.savefig(fig_path, bbox_inches='tight')
    plt.close()

[docs]def plot_gene_expression_output(timeseries, config, out_dir='out'):

    name = config.get('name', 'gene_expression')
    ports = config.get('ports', {})
    molecules = timeseries[ports['molecules']]
    transcripts = timeseries[ports['transcripts']]
    proteins = timeseries[ports['proteins']]
    time = timeseries['time']

    # make figure and plot
    n_cols = 1
    n_rows = 5
    plt.figure(figsize=(n_cols * 6, n_rows * 1.5))

    # define subplots
    ax1 = plt.subplot(n_rows, n_cols, 1)
    ax2 = plt.subplot(n_rows, n_cols, 2)
    ax3 = plt.subplot(n_rows, n_cols, 3)
    ax4 = plt.subplot(n_rows, n_cols, 4)
    ax5 = plt.subplot(n_rows, n_cols, 5)

    polymerase_ids = [
        UNBOUND_RNAP_KEY,
        UNBOUND_RIBOSOME_KEY]
    amino_acid_ids = list(amino_acids.values())
    nucleotide_ids = list(nucleotides.values())

    # plot polymerases
    for poly_id in polymerase_ids:
        ax1.plot(time, proteins[poly_id], label=poly_id)
    ax1.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    ax1.title.set_text('polymerases')

    # plot nucleotides
    for nuc_id in nucleotide_ids:
        ax2.plot(time, molecules[nuc_id], label=nuc_id)
    ax2.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    ax2.title.set_text('nucleotides')

    # plot molecules
    for aa_id in amino_acid_ids:
        ax3.plot(time, molecules[aa_id], label=aa_id)
    ax3.legend(loc='center left', bbox_to_anchor=(2.0, 0.5))
    ax3.title.set_text('amino acids')

    # plot transcripts
    for transcript_id, series in transcripts.items():
        ax4.plot(time, series, label=transcript_id)
    ax4.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    ax4.title.set_text('transcripts')

    # plot proteins
    for protein_id in sorted(proteins.keys()):
        if protein_id != UNBOUND_RIBOSOME_KEY:
            ax5.plot(time, proteins[protein_id], label=protein_id)
    ax5.legend(loc='center left', bbox_to_anchor=(1.5, 0.5))
    ax5.title.set_text('proteins')

    # adjust axes
    for axis in [ax1, ax2, ax3, ax4, ax5]:
        axis.spines['right'].set_visible(False)
        axis.spines['top'].set_visible(False)

    ax1.set_xticklabels([])
    ax2.set_xticklabels([])
    ax3.set_xticklabels([])
    ax4.set_xticklabels([])
    ax5.set_xlabel('time (s)', fontsize=12)

    # save figure
    fig_path = os.path.join(out_dir, name)
    plt.subplots_adjust(wspace=0.3, hspace=0.5)
    plt.savefig(fig_path, bbox_inches='tight')


# test
[docs]def run_gene_expression(total_time=10, out_dir='out'):
    timeseries = test_gene_expression(total_time)
    plot_settings = {
        'name': 'gene_expression',
        'ports': {
            'transcripts': 'transcripts',
            'molecules': 'molecules',
            'proteins': 'proteins'}}
    plot_gene_expression_output(timeseries, plot_settings, out_dir)

    sim_plot_settings = {'max_rows': 25}
    plot_simulation_output(timeseries, sim_plot_settings, out_dir)

[docs]def test_gene_expression(total_time=10):
    # load the compartment
    compartment_config = {
        'external_path': ('external',),
        'global_path': ('global',),
        'agents_path': ('..', '..', 'cells',),
        'transcription': {
            'sequence': toy_chromosome_config['sequence'],
            'templates': toy_chromosome_config['promoters'],
            'genes': toy_chromosome_config['genes'],
            'promoter_affinities': toy_chromosome_config['promoter_affinities'],
            'transcription_factors': ['tfA', 'tfB'],
            'elongation_rate': 10.0},
        # 'complexation': {
        #     'monomer_ids': [],
        #     'complex_ids': [],
        #     'stoichiometry': {}}
    }
    compartment = GeneExpression(compartment_config)

    molecules = {
        nt: 1000
        for nt in nucleotides.values()}
    molecules.update({
        aa: 1000
        for aa in amino_acids.values()})

    proteins = {
        polymerase: 100
        for polymerase in [
                UNBOUND_RNAP_KEY,
                UNBOUND_RIBOSOME_KEY]}

    proteins.update({
        factor: 1
        for factor in [
                'tfA',
                'tfB']})

    # simulate
    settings = {
        'timestep': 1,
        'total_time': total_time,
        'initial_state': {
            'proteins': proteins,
            'molecules': molecules}}
    return simulate_compartment_in_experiment(compartment, settings)


if __name__ == '__main__':
    out_dir = os.path.join(COMPARTMENT_OUT_DIR, NAME)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    run_gene_expression(600, out_dir)