Source code for gavo.utils.parsetricks
"""
A wrapper around pyparsing. We need this because of the various
idiosyncrasies pyparsing has had over the years, and also because pyparsing
is not terribly well suited for the multi-grammar situation we have here.
Hence, whenever you use pyparsing in DaCHS: Use parsetricks instead of
pyparsing.
This is particularly important on older systems which may have pyparsings
older than 2.2. There are rather significant changes from 2.2 to 2.4,
and we fall back to the built-in pyparsing (which is 2.4) if your pyparsing
is too old.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
# Not checked by pyflakes: (effectively) API file with gratuitous imports
import contextlib
import os
import threading
if False:
from gavo.imp.pyparsing import *
else:
import pyparsing
if pyparsing.__version__<"2.4":
from gavo.imp.pyparsing import *
else:
from pyparsing import *
[docs]class MatchFirstWithSaneError(pyparsing.MatchFirst):
# upstream MatchFirst munges the error messages, which sometimes
# really is distastrous for us. We monkeypatch this until
# https://github.com/pyparsing/pyparsing/issues/464 is somehow
# resolved
[docs] def parseImpl(self, instring, loc, doActions=True):
maxExcLoc = -1
maxException = None
for e in self.exprs:
try:
ret = e._parse(instring, loc, doActions)
return ret
except pyparsing.ParseException as err:
if err.loc > maxExcLoc:
maxException = err
maxExcLoc = err.loc
except IndexError:
if len(instring) > maxExcLoc:
maxException = pyparsing.ParseException(instring, len(instring), e.errmsg, self)
maxExcLoc = len(instring)
# only got here if no expression matched, raise exception for match that made it the furthest
else:
if maxException is not None:
raise maxException
else:
raise pyparsing.ParseException(instring, loc, "no defined alternatives to match", self)
####################### Pyparsing hacks
#
# (1) When building grammars, always do so using the pyparsingWhitechars
# context manager. Building grammars is thread-safe, but different
# grammars here use different whitespace conventions, so without
# the c.m., you might get those messed up.
#
# (2) When parsing strings, *always* go through pyparseString(grammar,
# string) and fellow functions whenever your code could run from within
# the server (i.e., basically always outside of tests).
# pyparsing is not thread-safe, and thus we'll need to shoehorn some
# locking on top of it; I don't want to change the pyparsing methods
# themselves since they may be called very frequently.
ParserElement.enablePackrat()
_PYPARSE_LOCK = threading.RLock()
[docs]@contextlib.contextmanager
def pyparsingWhitechars(whiteChars):
"""a context manager that serializes pyparsing grammar compilation
and manages its whitespace chars.
We need different whitespace definitions in some parts of DaCHS.
(The default used to be " \\t" for a while, so this is what things
get reset to).
Since whitespace apparently can only be set globally for pyparsing,
we provide this c.m. Since it is possible that grammars will be
compiled in threads (e.g., as a side effect of getRD), this is
protected by a lock. This, in turn, means that this can
potentially block for a fairly long time.
Bottom line: When compiling pyparsing grammars, *always* set
the whitespace chars explicitly, and do it through this c.m.
"""
_PYPARSE_LOCK.acquire()
ParserElement.setDefaultWhitespaceChars(whiteChars)
try:
yield
finally:
ParserElement.setDefaultWhitespaceChars(" \t")
_PYPARSE_LOCK.release()
[docs]def pyparseString(grammar, string, **kwargs):
"""parses a string using a pyparsing grammar thread-safely.
"""
with _PYPARSE_LOCK:
res = grammar.parseString(string, **kwargs)
ParserElement.resetCache()
return res