"""
A grammar that just splits the source into input lines and then
lets you name character ranges.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
from gavo import base
from gavo import utils
from gavo.grammars.common import Grammar, FileRowIterator, FileRowAttributes
from gavo.utils import parsetricks
[docs]class SplitLineIterator(FileRowIterator):
def __init__(self, grammar, sourceToken, **kwargs):
FileRowIterator.__init__(self, grammar, sourceToken, **kwargs)
for i in range(self.grammar.topIgnoredLines):
self.inputFile.readline()
self.lineNo = self.grammar.topIgnoredLines
def _iterRows(self):
while True:
self.lineNo += 1
inputLine = self.inputFile.readline()
if not inputLine:
break
if (self.grammar.commentIntroducer is not base.NotGiven
and inputLine.startswith(self.grammar.commentIntroducer)):
continue
res = self._parse(inputLine)
yield res
self.recNo += 1
self.inputFile.close()
self.grammar = None
def _parse(self, inputLine):
res = {}
try:
for key, slice in self.grammar.colRanges.items():
if self.grammar.strip:
res[key] = inputLine[slice].strip()
else:
res[key] = inputLine[slice]
except IndexError:
raise base.ui.logOldExc(base.SourceParseError("Short line", inputLine,
self.getLocator(), self.sourceToken))
return res
[docs] def getLocator(self):
return "line %d"%self.lineNo
[docs]class ColRangeAttribute(base.UnicodeAttribute):
"""A range of indices.
Ranges can be specified as either <int1>-<int2>, just <int>
(which is equivalent to <int>-<int>), or as half-open ranges
(<int>- or -<int>) Ranges are, contrary to
python slices, inclusive on both sides, and start counting
from one.
"""
[docs] def parse(self, value):
if isinstance(value, slice):
# we're already parsed
return value
try:
if "-" in value:
startLit, endLit = value.split("-")
start, end = None, None
if startLit.strip():
start = int(startLit)-1
if endLit.strip():
end = int(endLit)
return slice(start, end)
else:
col = int(value)
return slice(col-1, col)
except ValueError:
raise base.ui.logOldExc(
base.LiteralParseError("colRanges", value, hint="A column range,"
" (either int1-int2 or just an int) is expected here."))
[docs]class ColumnGrammar(Grammar, FileRowAttributes):
"""A grammar that builds rowdicts out of character index ranges.
This works by using the colRanges attribute like <col key="mag">12-16</col>,
which will take the characters 12 through 16 inclusive from each input
line to build the input column mag.
As a shortcut, you can also use the colDefs attribute; it contains
a string of the form {<key>:<range>}, i.e.,
a whitespace-separated list of colon-separated items of key and range
as accepted by cols, e.g.::
<colDefs>
a: 3-4
_u: 7
</colDefs>
"""
name_ = "columnGrammar"
_til = base.IntAttribute("topIgnoredLines", default=0, description=
"Skip this many lines at the top of each source file.",
copyable=True)
_cols = base.DictAttribute("colRanges", description="Mapping of"
" source keys to column ranges.", itemAttD=ColRangeAttribute("col"),
copyable=True)
_colDefs = base.ActionAttribute("colDefs", description="Shortcut"
" way of defining cols", methodName="_parseColDefs")
_commentIntroducer = base.UnicodeAttribute("commentIntroducer",
default=base.NotGiven, description="A character sequence"
" that, when found at the beginning of a line makes this line"
" ignored", copyable=True)
_strip = base.BooleanAttribute("strip", default=True,
description="Strip all parsed strings?", copyable=True)
def _getColDefGrammar(self):
with parsetricks.pyparsingWhitechars("\n\t\r "):
intLiteral = parsetricks.Word(parsetricks.nums)
# need to manually swallow whitespace after literals
blindWhite = parsetricks.Suppress(parsetricks.Optional(parsetricks.White()))
dash = blindWhite + parsetricks.Literal("-") + blindWhite
range = parsetricks.Combine(
dash + blindWhite + intLiteral
| intLiteral + parsetricks.Optional(dash + parsetricks.Optional(intLiteral)))
range.setName("Column range")
identifier = parsetricks.Regex(utils.identifierPattern.pattern[:-1])
identifier.setName("Column key")
clause = (identifier + parsetricks.Literal(":") + blindWhite + range
).addParseAction(lambda s,p,t: (t[0], t[2]))
colDefs = parsetricks.ZeroOrMore(clause)+parsetricks.StringEnd()
# range.setDebug(True);identifier.setDebug(True);clause.setDebug(True)
return colDefs
def _parseColDefs(self, ctx):
# the handler for colDefs -- parse shortcut colDefs
try:
for key, range in utils.pyparseString(self._getColDefGrammar(),
self.colDefs):
self.colRanges[key] = self._cols.itemAttD.parse(range)
except parsetricks.ParseException as ex:
raise base.LiteralParseError("colDefs", self.colDefs,
hint="colDefs is a whitespace-separated list of key:range pairs."
" Your literal doesn't look like this, and here's what the"
" parser had to complain: %s"%ex)
rowIterator = SplitLineIterator