Source code for plastid.util.io.binary

#!/usr/bin/env python
"""Tools for reading values from binary files

See Also
--------
:py:mod:`struct`
    Binary data structures in Python
"""
import numpy
import sys
import struct
from collections import namedtuple


[docs]class BinaryParserFactory(object):
    """Parser factory for different types of binary records.
    
    Creates parsers that unpack binary byte streams into dictionaries
    that match field names to values. These parsers are most useful as components
    of binary file readers.

        
    Parameters
    ----------
    name : str
        Name for parser
    
    fmt : str
        String specifying binary format of data. See :py:mod:`struct`
    
    fields : list
        Ordered list of field names to bind to data unpacked from binary file
        
    
    Attributes
    ----------
    name : str
        Human-readable name for parser
    
    fmt : str
        String specifying binary format of data, as specified in :py:mod:`struct`
    
    fields : list
        List of strings specifying variable names to bind to data
        when unpacked from a binary file, in same order as items in ``fmt``
    
    nt : :class:`~collections.namedtuple`
        A :class:`~collections.namedtuple` instance that will provide names
        to the unpacked data

    
    Examples
    --------
    A binary RGB color parser::
    
        >>> ColorParser = BinaryParserFactory("ColorParser","3Q",["r","g","b"])
        >>> fh = open("some_binary_file_containing_colors.bin","rb") # 'b' is important in mode flag!!
        >>> fh.seek(byte_location_of_an_rgb_color)
        >>> rgb_dict = ColorParser(fh) # read and parse 3 8-bit integers from file
        >>> rgb_dict
            { "r" : 255,
              "g" : 0,
              "b" : 52 }


    See Also
    --------
    struct
        For information on format strings
    """

    def __init__(self, name, fmt, fields):
        """Create a |BinaryParserFactory|
        
        Parameters
        ----------
        name : str
            Name for parser
        
        fmt : str
            String specifying binary format of data. See :py:mod:`struct`
        
        fields : list
            Ordered list of field names to bind to data unpacked from binary file
        """
        self.name = name
        self.fmt = fmt
        self.fields = fields
        self.nt = namedtuple(name, fields)

    def __str__(self):
        return "<%s fmt='%s' fields='%s'>" % (self.name, self.fmt, ",".join(self.fields))

    def __repr__(self):
        return str(self)

    def __call__(self, fh, byte_order="<"):
        """Parse data from `fh` into a dictionary mapping field names to their values
        
        Parameters
        ----------
        fh : byte stream
            File-like pointing to binary data. Pointer in file must be
            aligned with start of record.
        
        byte_order : str
            Character indicating endian-ness of data (default: `'<'` for little-endian)
        
        Returns
        -------
        :py:class:`~collections.OrderedDict`
            Dictionary mapping field names from `self.fields` to their values
        """
        tmp_dict = self.nt._make(
            struct.unpack(byte_order + self.fmt, fh.read(self.calcsize(byte_order)))
        )._asdict()
        for k in tmp_dict:
            if isinstance(tmp_dict[k], bytes):
                # Python 3.x returns bytes
                # convert byte objects to strings
                tmp_dict[k] = tmp_dict[k].decode("ascii")
            if sys.version_info < (3, ) and isinstance(tmp_dict[k], unicode):
                # Python 2.x returns unicodes
                # convert unicode to strings
                tmp_dict[k] = str(tmp_dict[k].decode("ascii"))

        return tmp_dict

[docs]    def calcsize(self, byte_order="<"):
        """Return calculated size, in bytes, of record

        Parameters
        ----------
        byte_order : str
            Character indicating endian-ness of data (default: `'<'` for little-endian)

        Returns
        -------
        int
            Calculated size of record, in bytes
        """
        return struct.calcsize(byte_order + self.fmt)


[docs]def find_null_bytes(inp, null=b"\x00"):
    """Finds all null characters in a byte-formatted input string
    
    Parameters
    ----------
    inp : bytes (or str in Python 2.7)
        byte string
    
    Returns
    -------
    :py:class:`numpy.ndarray`
        numpy array of integers indexing where the null character *\x00* was found
    """
    indices = []
    last_found = inp.find(null)
    while last_found > -1:
        indices.append(last_found)
        last_found = inp.find(null, 1 + last_found)

    return numpy.array(indices)