Source code for gavo.grammars.kvgrammar
"""
A grammar parsing key-value pairs from plain text files.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import re
from gavo import base
from gavo.grammars.common import Grammar, FileRowIterator, MapKeys, REAttribute
[docs]class KVIterator(FileRowIterator):
"""is an iterator over a file containing key, value pairs.
Depending on the parent grammar, it returns the whole k,v record as
one row or one pair per row.
"""
phase = "(nothing read yet)"
def _iterRows(self):
data = self.inputFile.read()
self.phase = "(decoding)"
completeRecord = {}
self.phase = "(decoding)"
data = re.sub(self.grammar.commentPattern, "", data)
for index, rec in enumerate(self.grammar.recSplitter.split(data)):
try:
self.phase = "record %s %s"%(index, repr(rec))
if rec.strip():
key, value = self.grammar.pairSplitter.match(rec).groups()
if self.grammar.yieldPairs:
yield {"key": key.strip(), "value": value.strip()}
else:
completeRecord[key.strip()] = value.strip()
except:
self.inputFile.close()
raise base.ui.logOldExc(
base.SourceParseError("Not a key value pair: %s"%(repr(rec)),
source=self.inputFile.name))
self.inputFile.close()
self.phase = "(postprocessing)"
if not self.grammar.yieldPairs:
yield self.grammar.mapKeys.doMap(completeRecord)
[docs] def getLocator(self):
return self.phase
[docs]class KeyValueGrammar(Grammar):
"""A grammar to parse key-value pairs from files.
The default assumes one pair per line, with # comments and = as
separating character.
yieldPairs makes the grammar return an empty docdict
and {"key":, "value":} rowdicts.
Whitespace around key and value is ignored.
"""
name_ = "keyValueGrammar"
_kvSeps = base.UnicodeAttribute("kvSeparators", default=":=",
description="Characters accepted as separators between key and value")
_pairSeps = base.UnicodeAttribute("pairSeparators", default="\n",
description="Characters accepted as separators between pairs")
_cmtPat = REAttribute("commentPattern", default=re.compile("(?m)#.*"),
description="A regular expression describing comments.")
_yieldPairs = base.BooleanAttribute("yieldPairs", default=False,
description="Yield key-value pairs instead of complete records?")
_mapKeys = base.StructAttribute("mapKeys", childFactory=MapKeys,
default=None, description="Mappings to rename the keys coming from"
" the source files. Use this, in particular, if the keys are"
" not valid python identifiers.")
rowIterator = KVIterator
[docs] def onElementComplete(self):
self.recSplitter = re.compile("[%s]"%self.pairSeparators)
self.pairSplitter = re.compile("([^%s]+)[%s](.*)"%(
self.kvSeparators, self.kvSeparators))
if self.mapKeys is None:
self.mapKeys = base.makeStruct(MapKeys)
super().onElementComplete()