Source code for goenrich.read

"""
parsers for different go-annotation formats
"""
import pandas as pd
GENE_ASSOCIATION_COLUMNS = ('db', 'db_object_id', 'db_object_symbol',
                            'qualifier', 'go_id', 'db_reference',
                            'evidence_code', 'with_from', 'aspect',
                            'db_object_name', 'db_object_synonym',
                            'db_object_type', 'taxon', 'date', 'assigned_by',
                            'annotation_extension', 'gene_product_form_id')
EXPERIMENTAL_EVIDENCE = ('EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP')
[docs]def goa(filename, experimental=True, **kwds): """ read go-annotation file :param filename: protein or gene identifier column :param experimental: use only experimentally validated annotations """ defaults = {'comment' : '!', 'names': GENE_ASSOCIATION_COLUMNS} if experimental and 'usecols' in kwds: kwds['usecols'] += ('evidence_code', ) defaults.update(kwds) result = pd.read_table(filename, **defaults) if experimental: retain_mask = result.evidence_code.isin(EXPERIMENTAL_EVIDENCE) result.drop(result.index[~retain_mask], inplace=True) return result
[docs]def sgd(filename, experimental=False, **kwds): """ read yeast genome database go-annotation file :param filename: protein or gene identifier column :param experimental: use only experimentally validated annotations """ return goa(filename, experimental, **kwds)
GENE2GO_COLUMNS = ('tax_id', 'GeneID', 'GO_ID', 'Evidence', 'Qualifier', 'GO_term', 'PubMed', 'Category')
[docs]def gene2go(filename, experimental=False, tax_id=9606, **kwds): """ read go-annotation file :param filename: protein or gene identifier column :param experimental: use only experimentally validated annotations :param tax_id: filter according to taxon """ defaults = {'comment': '#', 'names': GENE2GO_COLUMNS} defaults.update(kwds) result = pd.read_table(filename, **defaults) retain_mask = result.tax_id == tax_id result.drop(result.index[~retain_mask], inplace=True) if experimental: retain_mask = result.Evidence.isin(EXPERIMENTAL_EVIDENCE) result.drop(result.index[~retain_mask], inplace=True) return result