Source code for plastid.readers.bowtie

#!/usr/bin/env python
"""This module contains a parser for `bowtie`_'s legacy output format.
Functions in this module are are seldom used on their own, and rather are
accessed by |GenomeArray| or |SparseGenomeArray| when importing from
`bowtie`_ files.

See also
--------
|GenomeArray| and |SparseGenomeArray|
    Array-like objects that store and index quantitative data over genomes

http://bowtie-bio.sourceforge.net/manual.shtml#default-bowtie-output
    Detailed description of `bowtie`_ output format

:py:mod:`plastid.test.unit.genomics.test_genome_array`
    for integrative tests of these functions
"""
__author__ = "joshua"
__date__ = "2011-03-18"

from plastid.genomics.roitools import SegmentChain, GenomicSegment
from plastid.util.io.filters import AbstractReader

#===============================================================================
# INDEX: Readers for various bowtie1-like alignment file formats
#===============================================================================


[docs]class BowtieReader(AbstractReader): """Read alignments from `bowtie`_ files line-by-line into |SegmentChains|. The following attributes are defined and stored in the `attr` dict of each returned |SegmentChain| `seq_as_aligned` the sequence in the direction it aligns, NOT necessarily the read in the direction it was sequenced `qualstr_phred` a quality string, phred encoded `total_alignments` the number of total alignments found See description of `bowtie`_ legacy format at http://bowtie-bio.sourceforge.net/manual.shtml Parameters ---------- stream : file-like Stream of alignments in `bowtie`_'s legacy output format Yields ------- |SegmentChain| A read alignment """
[docs] def filter(self, line): """Parse a read alignment as |SegmentChain| from a line of `bowtie`_ output""" items = line.strip("\n").split("\t") read_name = items[0] strand = items[1] ref_seq = items[2] coord = int(items[3]) attr = { 'seq_as_aligned' : items[4], 'qualstr' : items[5], 'mismatch_str' : items[7], 'type' : "alignment", 'ID' : read_name, } # yapf: disable iv = GenomicSegment(ref_seq, coord, coord + len(attr['seq_as_aligned']), strand) feature = SegmentChain(iv, **attr) return feature