Source code for goenrich.obo
import itertools
import networkx as nx
def _tokenize(f):
token = []
for line in f:
if line == '\n':
yield token
token = []
else:
token.append(line)
def _filter_terms(tokens):
for token in tokens:
if token[0] == '[Term]\n':
yield token[1:]
def _parse_terms(terms):
for term in terms:
obsolete = False
node = {}
parents = []
for line in term:
if line.startswith('id:'):
id = line[4:-1]
elif line.startswith('name:'):
node['name'] = line[6:-1]
elif line.startswith('namespace:'):
node['namespace'] = line[11:-1]
elif line.startswith('is_a:'):
parents.append(line[6:16])
elif line.startswith('relationship: part_of'):
parents.append(line[22:32])
elif line.startswith('is_obsolete'):
obsolete = True
break
if not obsolete:
edges = [(p, id) for p in parents] # will reverse edges later
yield (id, node), edges
else:
continue
_filename = 'db/go-basic.obo'
[docs]def ontology(file):
""" read ontology from file
:param file: file path of file handle
"""
O = nx.DiGraph()
if isinstance(file, str):
f = open(file)
we_opened_file = True
else:
f = file
we_opened_file = False
try:
tokens = _tokenize(f)
terms = _filter_terms(tokens)
entries = _parse_terms(terms)
nodes, edges = zip(*entries)
O.add_nodes_from(nodes)
O.add_edges_from(itertools.chain.from_iterable(edges))
O.graph['roots'] = {data['name'] : n for n, data in O.node.items()
if data['name'] == data['namespace']}
finally:
if we_opened_file:
f.close()
for root in O.graph['roots'].values():
for n, depth in nx.shortest_path_length(O, root).items():
node = O.node[n]
node['depth'] = min(depth, node.get('depth', float('inf')))
return O.reverse()