Source code for goenrich.obo

import itertools
import networkx as nx

def _tokenize(f):
    token = []
    for line in f:
        if line == '\n':
            yield token
            token = []
        else:
            token.append(line)

def _filter_terms(tokens):
    for token in tokens:
        if token[0] == '[Term]\n':
            yield token[1:]

def _parse_terms(terms):
    for term in terms:
        obsolete = False
        node = {}
        parents = []
        for line in term:
            if line.startswith('id:'):
                id = line[4:-1]
            elif line.startswith('name:'):
                node['name'] = line[6:-1]
            elif line.startswith('namespace:'):
                node['namespace'] = line[11:-1]
            elif line.startswith('is_a:'):
                parents.append(line[6:16])
            elif line.startswith('relationship: part_of'):
                parents.append(line[22:32])
            elif line.startswith('is_obsolete'):
                obsolete = True
                break
        if not obsolete:
            edges = [(p, id) for p in parents] # will reverse edges later
            yield (id, node), edges
        else:
            continue

_filename = 'db/go-basic.obo'

[docs]def ontology(file): """ read ontology from file :param file: file path of file handle """ O = nx.DiGraph() if isinstance(file, str): f = open(file) we_opened_file = True else: f = file we_opened_file = False try: tokens = _tokenize(f) terms = _filter_terms(tokens) entries = _parse_terms(terms) nodes, edges = zip(*entries) O.add_nodes_from(nodes) O.add_edges_from(itertools.chain.from_iterable(edges)) O.graph['roots'] = {data['name'] : n for n, data in O.node.items() if data['name'] == data['namespace']} finally: if we_opened_file: f.close() for root in O.graph['roots'].values(): for n, depth in nx.shortest_path_length(O, root).items(): node = O.node[n] node['depth'] = min(depth, node.get('depth', float('inf'))) return O.reverse()