Source code for vivarium.data.knowledge_base

import os
import csv

from vivarium.data.spreadsheets import load_tsv

FLAT_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "flat")

LIST_OF_FLAT_FILENAMES = (
    os.path.join("wcEcoli_genes.tsv"),
    os.path.join("wcEcoli_proteins.tsv"),
    os.path.join("wcEcoli_environment_molecules.tsv"),
    os.path.join("timelines_def.tsv"),
    os.path.join("media_recipes.tsv"),
    os.path.join("media", "wcEcoli_base.tsv"),
    os.path.join("media", "M9.tsv"),
    os.path.join("media", "M9_GLC.tsv"),
    os.path.join("media", "5X_supplement_EZ.tsv"),
    os.path.join("media", "GLC_G6P.tsv"),
    os.path.join("media", "GLC_LCT.tsv"),
    os.path.join("media", "ecoli_core_GLC.tsv"),
    os.path.join("media", "PURE_Fuji_2014.tsv"),
    os.path.join("media", "PURE_Ueda_2010.tsv"),
)

[docs]class DataStore(object): def __init__(self): pass
[docs]class KnowledgeBase(object): """ KnowledgeBase """ def __init__(self): # Load raw data from TSV files for filename in LIST_OF_FLAT_FILENAMES: self._load_tsv(FLAT_DIR, filename) self.genes = { gene['symbol']: gene for gene in self.wcEcoli_genes} self.proteins = { protein['geneId']: protein for protein in self.wcEcoli_proteins} def _load_tsv(self, dir_name, file_name): path = self steps = file_name.split(os.path.sep) for subPath in steps[:-1]: if not hasattr(path, subPath): setattr(path, subPath, DataStore()) path = getattr(path, subPath) attrName = steps[-1].split(".")[0] setattr(path, attrName, []) file_path = os.path.join(dir_name, file_name) rows = load_tsv(file_path) setattr(path, attrName, [row for row in rows])
[docs] def concatenate_sequences(self, units): sequence = '' for unit in units: gene = self.genes[unit] protein = self.proteins[gene['id']] sequence += protein['seq'] return sequence