genome
Genome Objects
class Genome()
__init__
def __init__(genome_fasta,
gtf,
name,
db_conn,
description=None,
transcriptome_fasta=None,
standalone=False,
proteome_fasta=None,
create=True)
Arguments:
gtf_path: Path to the GTF filegenome_fasta: Path to the genome fasta filetranscriptome_fasta: Path to the transcriptome fasta fileproteome_fasta: Path to the proteome fasta filedb_conn: database connection object this is a sqlalchemy enginetaxon_id: taxon id of the genome
genes
def genes(gene_ids=None, range=None, ignore_strand=True)
Gene id or range if range is provided it will return the genes in that range depending on the overlap type
Arguments:
id: Gene id, that used in the gtf filerange: A GenomicRange objectignore_strand: whether to ignore strand this will return all the genes in the range regardless of strand
Returns:
a GenomicRangesDict object with the genes in it, each key is the gene name and the value is a GenomicRange object
transcripts
def transcripts(gene_ids=None,
ids=None,
range=None,
ignore_strand=True,
group_by_gene=True)
return transcripts by gene id, transcript id or range
Arguments:
gene_ids: return transcripts for these gene idsids: return transcripts with these transcript idsrange: return transcripts in this rangeignore_strand: ignore strand when searching by rangegroup_by_gene: whether to group the returned transcripts by gene id, if true the returned object will have gene ids as keys and GenomicRangesList as values, if false the returned object will have transcript ids as keys and GenomicRange as values
Returns:
a genomic ranges dict object
exons
def exons(transcript_ids=None,
range=None,
group_by_transcript=True,
ignore_strand=True)
same as genes but will need to search by transcript not gene, if you do not know the transcript search for it with transcripts first
Arguments:
transcript_id: return all exons for this transcript idid: return exon with this idrange: return exons in this rangegroup_by_transcript: whether to group the returned exons by transcript id, if true the returned objectignore_strand: whether to ignore strand when searching by range
Returns:
a genomic ranges dict object, if not grouped by transcript the keys will be exon ids otherwise the keys will be transcript ids
coding
def coding(transcript_ids=None,
range=None,
group_by_transcript=True,
ignore_strand=True)
same as exons return all the coding sequences for a transcript or a list of transcripts
Arguments:
transcript_id: return all coding sequences for this transcript idid: return coding sequence with this idrange: return coding sequences in this rangegroup_by_transcript: whether to group the returned coding sequences by transcript id, if true the returned object
Returns:
a genomic ranges dict object, if not grouped by transcript the keys will be coding sequence ids otherwise the keys will be transcript ids
three_utr
def three_utr(transcript_ids=None, range=None, ignore_strand=True)
return all the 3’ utrs for a transcript or a list of transcripts
Arguments:
transcript_ids: return 3’ utrs for these transcript idsrange: return 3’ utrs in this rangeignore_strand: regardless of strand
Returns:
a genomic ranges dict object with transcript ids as keys and GenomicRangesList as values, the utrs are not described as separate exons but the exons are merged into one if that utr spans multiple exons. Additionally if the utrs ends in the middle of an exon the utr will end there.
five_utr
def five_utr(transcript_ids=None, ids=None, range=None, ignore_strand=True)
return all the 5’ utrs for a transcript or a list of transcripts
Arguments:
transcript_ids: return 3’ utrs for these transcript idsrange: return 3’ utrs in this rangeignore_strand: regardless of strand
Returns:
a genomic ranges dict object with transcript ids as keys and GenomicRangesList as values, the utrs are not described as separate exons but the exons are merged into one if that utr spans multple exons. Additionally if the utrs ends in the middle of an exon the utr will end there.
introns
def introns(transcript_ids=None,
ids=None,
range=None,
group_by_transcript=True,
ignore_strand=True)
return all the introns for a transcript or a list of transcripts
Arguments:
transcript_id: return introns for this transcript idid: return intron with this id (introns usually are not descibed in a gtf, so this id may not be very useful since it is an auto incremented id)range: return introns in this rangegroup_by_transcript: return introns grouped by transcriptignore_strand: whether to ignore strand when searching by range
Returns:
return: a genomic ranges dict object, if not grouped by transcript the keys will be intron ids otherwise the keys will be transcript ids
get_sequence
def get_sequence(genomic_range, type='genome')
Get the sequence of a genomic range. This takes a single genomic range you can iterate over a GenomicRangeList or GenomicRangeDict
Arguments:
genomic_range: GenomicRange object
Returns:
sequence as string
add_annotation
def add_annotation(table, row_id, annots)
add arbitrary annotations as a dictionary to a specific row in a specific table
Arguments:
table: which table to add the annotations toid: which row id to add the annotations to, this is the datbase internal id not the gene_id or transcript_id, those ids can be found in the annotations of each rowannots: a dictionary of annotations to add
Returns:
None but the database will be updated