Source code for gavo.grammars.binarygrammar
"""
A grammar reading from (fixed-record) binary files.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import re
import struct
from gavo import base
from gavo import utils
from gavo.grammars.common import Grammar, FileRowIterator
from gavo.utils import misctricks
from gavo.utils import parsetricks
[docs]class BinaryRowIterator(FileRowIterator):
"""A row iterator reading from binary files.
"""
fileMode = "rb"
def _iterUnarmoredRecords(self):
while True:
data = self.inputFile.read(self.grammar.fieldDefs.recordLength)
if data==b"":
return
yield data
def _iterInRecords(self):
self.inputFile.read(self.grammar.skipBytes)
if self.grammar.armor is None:
return self._iterUnarmoredRecords()
elif self.grammar.armor=="fortran":
return misctricks.iterFortranRecs(self.inputFile)
else:
assert False
def _iterRows(self):
fmtStr = self.grammar.fieldDefs.structFormat
fieldNames = self.grammar.fieldDefs.fieldNames
try:
for rawRec in self._iterInRecords():
yield dict(list(zip(fieldNames, struct.unpack(fmtStr, rawRec))))
except Exception as ex:
raise base.ui.logOldExc(base.SourceParseError(str(ex),
location="byte %s"%self.inputFile.tell(),
source=str(self.sourceToken)))
def _getFieldsGrammar():
with parsetricks.pyparsingWhitechars(" \n\t\r"):
identifier = parsetricks.Regex(utils.identifierPattern.pattern[:-1]
).setName("identifier")
formatCode = parsetricks.Regex("\d+s|[bBhHiIqQfd]"
).setName("fieldSpec")
field = ( identifier("identifier")
+ parsetricks.Suppress(parsetricks.Literal("("))
+ formatCode("formatCode")
+ parsetricks.Suppress(parsetricks.Literal(")"))).setParseAction(
lambda s, p, t: dict(t))
return parsetricks.OneOrMore(field)+parsetricks.StringEnd()
[docs]class BinaryRecordDef(base.Structure):
"""A definition of a binary record.
A binary records consists of a number of binary fields, each of which
is defined by a name and a format code. The format codes supported
here are a subset of what python's struct module supports. The
widths given below are for big, little, and packed binfmts.
For native (which is the default), it depends on your platform.
* <number>s -- <number> characters making up a string
* b,B -- signed and unsigned byte (8 bit)
* h,H -- signed and unsigned short (16 bit)
* i,I -- signed and unsigned int (32 bit)
* q,Q -- signed and unsigned long (64 bit)
* f,d -- float and double.
The content of this element gives the record structure in the format
<name>(<code>){<whitespace><name>(<code>)} where <name> is a c-style
identifier.
"""
name_ = "binaryRecordDef"
_fieldsGrammar = _getFieldsGrammar()
_binfmt = base.EnumeratedUnicodeAttribute("binfmt",
default="native",
validValues=["big", "little", "native", "packed"],
description="Binary format of the input data; big and little stand"
" for msb first and lsb first, and"
" packed is like native except no alignment takes place.")
_fields = base.DataContent(description="The enumeration of"
" the record fields.")
_binfmtToStructCode = {
"native": "",
"packed": "=",
"big": ">",
"little": "<"}
[docs] def completeElement(self, ctx):
try:
parsedFields = utils.pyparseString(self._fieldsGrammar, self.content_)
except parsetricks.ParseBaseException as ex:
raise base.ui.logOldExc(base.LiteralParseError("binaryRecordDef",
re.sub("\s+", " ", self.content_),
pos=str(ex.loc), hint="The parser said: '%s'"%str(ex)))
# XXX TODO: Position should probably be position during XML parse.
# Fix when we have source positions on parsed elements.
self.structFormat = (self._binfmtToStructCode[self.binfmt]+
str("".join(f["formatCode"] for f in parsedFields)))
self.recordLength = struct.calcsize(self.structFormat)
self.fieldNames = tuple(f["identifier"] for f in parsedFields)
super().completeElement(ctx)
[docs]class BinaryGrammar(Grammar):
"""A grammar that builds rowdicts from binary data.
The grammar expects the input to be in fixed-length records.
the actual specification of the fields is done via a binaryRecordDef
element.
"""
name_ = "binaryGrammar"
rowIterator = BinaryRowIterator
_til = base.IntAttribute("skipBytes",
default=0,
description="Number of bytes to skip before parsing records.")
_fdefs = base.StructAttribute("fieldDefs",
description="Definition of the record.",
childFactory=BinaryRecordDef)
_armoring = base.EnumeratedUnicodeAttribute("armor",
default=None,
validValues=["fortran"],
description="Record armoring; by default it's None meaning the"
" data was dumped to the file sequentially. Set it to fortran"
" for fortran unformatted files (4 byte length before and after"
" the payload).")