"""
Classes and methods to support vizier-type specifications on fields.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import datetime
import re
from gavo import base
from gavo import stc
from gavo import utils
from gavo.base import literals
from gavo.base import sqlmunge
from gavo.base import typesystems
from gavo.utils.parsetricks import (Word, Literal, Optional, Forward,
ZeroOrMore, Suppress, ParseException, StringEnd, Regex,
OneOrMore, CharsNotIn, pyparsingWhitechars)
[docs]class ParseNode(object):
"""is a parse node, consisting of an operator and children.
The parse trees returned by the various parse functions are built from
these.
This is an abstract class.
"""
def __init__(self, children, operator):
self.children = children
self.operator = operator
def __str__(self):
return "(%s %s)"%(self.operator, " ".join([str(c) for c in self.children]))
def __repr__(self):
return "(%r %r)"%(self.operator, " ".join([str(c) for c in self.children]))
[docs] def getSQLKey(self, name, item, sqlPars):
"""wraps base.getSQLKey and can be overridden in case operand
mangling is necessary.
Do not call base.getSQLKey directly from within ParseNodes.
"""
return base.getSQLKey(name, item, sqlPars)
[docs] def insertChild(self, index, field, sqlPars):
"""inserts children[index] into sqlPars with a unique key and returns
the key.
children[index] must be atomic (i.e., no ParseNode).
"""
item = self.children[index]
if item is None:
return None
assert not isinstance(item, ParseNode)
if getattr(field, "scaling", None):
item *= field.scaling
return self.getSQLKey(field.name, item, sqlPars)
[docs]class NumericNode(ParseNode):
"""A node containing numeric operands (floats or dates).
"""
[docs]class DateNode(ParseNode):
"""A node containing date operands (datetime objects, as a rule).
As an extension to VizieR, we interpret floats as well, where
1000 .. 3000 is a julian year, 10000 ... 100000 is an MJD and
2000000 .. 4000000 is a JD.
"""
[docs]class MJDNode(DateNode):
[docs] def getSQLKey(self, name, item, sqlPars):
if (isinstance(item, datetime.datetime)
or isinstance(item, datetime.date)):
item = stc.dateTimeToMJD(item)
return base.getSQLKey(name, item, sqlPars)
[docs]class StringNode(ParseNode):
"""A node containing string operands.
"""
def _getNodeFactory(op, nodeClass):
def _(s, loc, toks):
return nodeClass(toks, op)
return _
def _makeNotNodeFactory(nodeClass):
def _makeNotNode(s, loc, toks):
if len(toks)==1:
return toks[0]
elif len(toks)==2:
return nodeClass(toks[1:], "NOT")
else: # Can't happen :-)
raise Exception("Busted by not")
return _makeNotNode
[docs]def makePmNode(s, loc, toks):
return NumericNode([toks[0]-toks[1], toks[0]+toks[1]], "..")
[docs]def makeDatePmNode(s, loc, toks):
"""returns a +/- node for dates, i.e., toks[1] is a float in days.
"""
days = datetime.timedelta(days=toks[1])
return DateNode([toks[0]-days, toks[0]+days], "..")
[docs]def makeMJDPmNode(s, loc, toks):
"""returns a +/- node for MJDs, i.e., toks[1] is a float in days, and
an MJDNode must be returned.
"""
days = datetime.timedelta(days=toks[1])
return MJDNode([toks[0]-days, toks[0]+days], "..")
def _getBinopFactory(op, nodeClass):
def _(s, loc, toks):
if len(toks)==1:
return toks[0]
else:
return nodeClass(toks, op)
return _
def _simpleExprFactory(nodeClass):
def _makeSimpleExprNode(s, loc, toks):
if len(toks)==1:
return nodeClass(toks[0:], "=")
else:
return nodeClass(toks[1:], toks[0])
return _makeSimpleExprNode
[docs]def getComplexGrammar(baseLiteral, pmBuilder, errorLiteral=None,
nodeClass=NumericNode):
"""returns the root element of a grammar parsing numeric vizier-like
expressions.
This is used for both dates and floats, use baseLiteral to match the
operand terminal. The trouble with dates is that the +/- operator
has a simple float as the second operand, and that's why you can
pass in an errorLiteral and and pmBuilder.
"""
if errorLiteral is None:
errorLiteral = baseLiteral
with pyparsingWhitechars(" \t"):
preOp = Literal("=") | Literal(">=") | Literal(">"
) | Literal("<=") | Literal("<")
rangeOp = Literal("..")
pmOp = Literal("+/-") | Literal("\xb1")
orOp = Literal("|")
andOp = Literal("&")
notOp = Literal("!")
commaOp = Literal(",")
preopExpr = Optional(preOp) + baseLiteral
rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral
valList = baseLiteral + OneOrMore( Suppress(commaOp) + baseLiteral)
pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral
simpleExpr = rangeExpr | pmExpr | valList | preopExpr
expr = Forward()
notExpr = Optional(notOp) + simpleExpr
andExpr = notExpr + ZeroOrMore( Suppress(andOp) + notExpr )
orExpr = andExpr + ZeroOrMore( Suppress(orOp) + expr)
expr << orExpr
exprInString = expr + StringEnd()
rangeExpr.setName("rangeEx")
rangeOp.setName("rangeOp")
notExpr.setName("notEx")
andExpr.setName("andEx")
andOp.setName("&")
orExpr.setName("orEx")
expr.setName("expr")
simpleExpr.setName("simpleEx")
preopExpr.addParseAction(_simpleExprFactory(nodeClass))
rangeExpr.addParseAction(_getNodeFactory("..", nodeClass))
pmExpr.addParseAction(pmBuilder)
valList.addParseAction(_getNodeFactory(",", nodeClass))
notExpr.addParseAction(_makeNotNodeFactory(nodeClass))
andExpr.addParseAction(_getBinopFactory("AND", nodeClass))
orExpr.addParseAction(_getBinopFactory("OR", nodeClass))
return exprInString
[docs]def parseFloat(s, pos, tok):
# If something looks like an int, return it as an int.
# Otherwise, postgres won't use int-indices
try:
return int(tok[0])
except ValueError:
return float(tok[0])
[docs]def parseDateTime(s, pos, tok):
"""returns a datetime from a date/time spec.
This can be an ISO string or a julian year, JD, or MJD by
heuristics (see DateNode for details).
"""
assert len(tok)==1
try:
floatVal = float(tok[0])
if 1000<=floatVal<=3000:
return utils.roundToSeconds(stc.jYearToDateTime(floatVal))
elif 10000<=floatVal<=100000:
return utils.roundToSeconds(stc.mjdToDateTime(floatVal))
elif 2e6<=floatVal<=4e6:
return utils.roundToSeconds(stc.jdnToDateTime(floatVal))
else:
raise base.ParseException("Floats as dates must be "
" credible years (1000..2000), MJDs (1e4 .. 1e5), or"
" JDs (2e6 .. 4e6)")
except ValueError:
return literals.parseDefaultDatetime(tok[0])
floatLiteral = Regex(utils.floatRE).addParseAction(parseFloat)
_DATE_REGEX = r"\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\d(\.\d*)?)?|(\d+(\.\d*)?)"
_DATE_LITERAL_DT = Regex(_DATE_REGEX).addParseAction(parseDateTime)
[docs]def parseNumericExpr(str, baseSymbol=getComplexGrammar(floatLiteral,
makePmNode)):
"""returns a parse tree for vizier-like expressions over floats.
"""
return utils.pyparseString(baseSymbol, str)[0]
[docs]def parseDateExpr(str, baseSymbol=getComplexGrammar(_DATE_LITERAL_DT,
makeDatePmNode, floatLiteral, DateNode)):
"""returns a parse tree for vizier-like expressions over ISO dates.
Note that the semantic validity of the date (like, month<13) is not
checked by the grammar.
"""
return utils.pyparseString(baseSymbol, str)[0]
[docs]def parseDateExprToMJD(str, baseSymbol=getComplexGrammar(_DATE_LITERAL_DT,
makeMJDPmNode, floatLiteral, MJDNode)):
"""returns a parse tree for vizier-like expression of ISO dates with
parsed values in MJD.
"""
return utils.pyparseString(baseSymbol, str)[0]
def _makeOpNode(s, loc, toks):
return StringNode(toks[1:], toks[0])
[docs]def getStringGrammar():
"""returns a grammar for parsing vizier-like string expressions.
"""
# XXX TODO: should we cut at =| (which is currently parsed as = |)?
with pyparsingWhitechars(" \t"):
simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\
Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\
Literal("=,")
simpleOperand = Regex(r"[^\s].*|")
# XXX probably a bug in pyparsing: White shouldn't be necessary here
White = Word(" \t")
simpleExpr = simpleOperator + Optional( White ) + simpleOperand
commaOperand = Regex("[^,]+")
barOperand = Regex("[^|]+")
commaEnum = Literal("=,") + commaOperand + ZeroOrMore(
Suppress(",") + commaOperand)
exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore(
Suppress(",") + commaOperand)
barEnum = Literal("=|") + barOperand + ZeroOrMore(
Suppress("|") + barOperand)
enumExpr = exclusionEnum | commaEnum | barEnum
patLiterals = CharsNotIn("[*?")
wildStar = Literal("*")
wildQmark = Literal("?")
setElems = CharsNotIn("]")
setSpec = Suppress("[") + setElems + Suppress("]")
pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals)
patternOperator = Literal("~") | Literal("=") | Literal("!~") |\
Literal("!")
patternExpr = patternOperator + Optional( White ) + pattern
nakedExpr = Regex("[^=!~|><]") + Optional( simpleOperand )
stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr
doc = stringExpr + StringEnd()
stringExpr.setName("StringExpr")
enumExpr.setName("EnumExpr")
simpleOperand.setName("Operand")
simpleOperator.setName("Operator")
nakedExpr.setName("SingleOperand")
debug = False
stringExpr.setDebug(debug)
enumExpr.setDebug(debug)
patLiterals.setDebug(debug)
simpleOperand.setDebug(debug)
simpleOperator.setDebug(debug)
nakedExpr.setDebug(debug)
simpleExpr.addParseAction(_makeOpNode)
patternExpr.addParseAction(_makeOpNode)
enumExpr.addParseAction(_makeOpNode)
makeDefaultExpr = _getNodeFactory("==", StringNode)
nakedExpr.addParseAction(lambda s,p,toks: makeDefaultExpr(s,p,
["".join(toks)]))
wildStar.addParseAction(_makeOpNode)
wildQmark.addParseAction(_makeOpNode)
setElems.addParseAction(_getNodeFactory("[", StringNode))
return doc
[docs]def parseStringExpr(str, baseSymbol=getStringGrammar()):
return utils.pyparseString(baseSymbol, str)[0]
class _Flattener:
"""An abstract base class for things turning our parse trees into SQL.
Concrete derivations need to define a set _standardOperators containing the
normal binary operators for their types and a dictionary _sqlEmitters
containing functions returning SQL fragments; of course, they
can also completey override getSQLFor.
"""
def getSQLFor(self, node, field, sqlPars):
"""returns SQL for a parse node node over an InputKey field.
sqlPars is the dict that is later passed to conn.execute and
will be modified here.
"""
if node.operator in self._standardOperators:
return "{} {} %({})s".format(
field.name,
node.operator,
node.insertChild(0, field, sqlPars))
else:
return self._sqlEmitters[node.operator](
self, node, field, sqlPars)
[docs]class NumericFlattener(_Flattener):
"""A _Flattener for float- and int-valued columns.
"""
def _emitBinop(self, node, field, sqlPars):
return base.joinOperatorExpr(node.operator,
[self.getSQLFor(c, field, sqlPars) for c in node.children])
def _emitUnop(self, node, field, sqlPars):
operand = self.getSQLFor(node.children[0], field, sqlPars)
if operand:
return "{} ({})".format(node.operator, operand)
def _emitEnum(self, node, field, sqlPars):
return "{} IN ({})".format(
field.name,
", ".join("%({})s".format(node.insertChild(i, field, sqlPars))
for i in range(len(node.children))))
_standardOperators = set(["=", ">=", ">", "<=", "<"])
_sqlEmitters = {
'..': lambda self, node, field, sqlPars:
"{} BETWEEN %({})s AND %({})s".format(
field.name,
node.insertChild(0, field, sqlPars),
node.insertChild(1, field, sqlPars)),
'AND': _emitBinop,
'OR': _emitBinop,
'NOT': _emitUnop,
',': _emitEnum,
}
[docs]class NumericIntervalFlattener(NumericFlattener):
"""A _Flattener for matching against min/max pairs of columns.
Here, field is this (min_column, max_column) pair.
This currently isn't used anywhere in DaCHS itself. You can use
it in custom phraseMakers, though; cf. :samplerd:`obsform/q`
"""
def _emitEnum(self, node, field, sqlPars):
return base.joinOperatorExpr("OR",
[self._emitEquality(node, field, sqlPars, useChild=childInd)
for childInd, _ in enumerate(node.children)])
def _emitInterval(self, node, field, sqlPars):
# interval condition: overlaps
return ("{h1}>=%({l2})s AND %({h2})s>={l1}"
" AND {l1}<={h1} AND %({l2})s<=%({h2})s").format(
l1=field[0].name,
h1=field[1].name,
l2=node.insertChild(0, field[0], sqlPars),
h2=node.insertChild(1, field[1], sqlPars))
def _emitEquality(self, node, field, sqlPars, useChild=0):
# equality is interpreted as "contains" here
return "%({})s BETWEEN {} AND {}".format(
node.insertChild(useChild, field[0], sqlPars),
field[0].name,
field[1].name)
def _emitLarger(self, node, field, sqlPars):
# larger/larger equal is, perhaps a bit funkily, larger than
# upper/lower bound.
if "=" in node.operator:
limitField = field[1]
else:
limitField = field[0]
return "{} > %({})s".format(
limitField.name,
node.insertChild(0, limitField, sqlPars))
def _emitSmaller(self, node, field, sqlPars):
# smaller/smaller equal is, perhaps a bit funkily, smaller than
# lower/upper bound.
if "=" in node.operator:
limitField = field[0]
else:
limitField = field[1]
return "{} < %({})s".format(
limitField.name,
node.insertChild(0, limitField, sqlPars))
_standardOperators = set()
_sqlEmitters = {
'..': _emitInterval,
'=': _emitEquality,
'>=': _emitLarger,
'>': _emitLarger,
'<': _emitSmaller,
'<=': _emitSmaller,
'AND': NumericFlattener._emitBinop,
'OR': NumericFlattener._emitBinop,
'NOT': NumericFlattener._emitUnop,
',': _emitEnum,
}
[docs]class DateFlattener(_Flattener):
"""A _Flattener producing SQL for data-like columns.
This is supposed to work for timestamp-valued and MJD-valued columns
alike. The difference is handled on the level of nodes (DateNode
vs. MJDNode).
"""
def _expandDate(self, arg):
# returns the last second of arg if it looks like a day (i.e., hms=0)
# this is to fix postgres' behaviour when comparing timestamps and
# dates (e.g., 1990-01-01 < 1990-01-01T00:00:01).
if arg.hour==arg.minute==arg.second==0:
return arg.replace(hour=23,
minute=59, second=59)
return arg
def _emitBinop(self, node, field, sqlPars):
return base.joinOperatorExpr(node.operator,
[self.getSQLFor(c, field, sqlPars) for c in node.children])
def _emitUnop(self, node, field, sqlPars):
operand = self.getSQLFor(node.children[0], field, sqlPars)
if operand:
return "%s (%s)"%(node.operator, operand)
def _emitRange(self, node, field, sqlPars):
return "{} BETWEEN %({})s AND %({})s".format(
field.name,
node.insertChild(0, field, sqlPars),
node.insertChild(1, field, sqlPars))
def _emitEnum(self, node, field, sqlPars):
return base.joinOperatorExpr("OR",
[self.getSQLFor(node.__class__([child], "="), field, sqlPars)
for child in node.children])
def _emitSimple(self, node, field, sqlPars):
# return a simple comparison with the date itself
return "{} {} %({})s".format(field.name, node.operator,
node.insertChild(0, field, sqlPars))
def _emitEqual(self, node, field, sqlPars):
if node.children[0].hour==node.children[0].minute==0:
# expand this to a condition covering the whole day
return "{} BETWEEN %({})s AND %({})s".format(
field.name,
node.getSQLKey(field.name, node.children[0], sqlPars),
node.getSQLKey(field.name,
self._expandDate(node.children[0]), sqlPars))
else:
return self._emitSimple(node, field, sqlPars)
def _emitExpanded(self, node, field, sqlPars):
# return a simple comparison with date's midnight
node.children[0] = self._expandDate(node.children[0])
return "{} {} %({})s".format(
field.name,
node.operator,
node.insertChild(0, field, sqlPars))
_standardOperators = set()
_sqlEmitters = {
'..': _emitRange,
'AND': _emitBinop,
'OR': _emitBinop,
'NOT': _emitUnop,
',': _emitEnum,
"=": _emitEqual,
">=": _emitSimple,
"<=": _emitExpanded,
"<": _emitSimple,
">": _emitExpanded,
"<=": _emitExpanded,
}
[docs]class StringFlattener(_Flattener):
"""A _Flattener turning String-like Vizier expressions into
SQL conditions against string-valued columns.
"""
_metaEscapes = {
"|": r"\|",
"*": r"\*",
"+": r"\+",
"(": r"\(",
")": r"\)",
"[": r"\[",
"%": r"\%",
"_": r"\_",
"\\\\": "\\\\",
}
_escapeRE = re.compile("[%s]"%"".join(list(_metaEscapes.keys())))
# The backslash in _metaEscapes is escaped to make _escapeRE work,
# but of course I need to replace the unescaped version.
_metaEscapes.update({"\\": "\\\\"})
def _escapeSpecials(self, aString):
"""returns aString with SQL RE metacharacters escaped.
"""
return self._escapeRE.sub(
lambda mat: self._metaEscapes[mat.group()],
aString)
def _makePattern(self, node, field, sqlPars):
parts = []
for child in node.children:
if isinstance(child, str):
parts.append(self._escapeSpecials(child))
else:
parts.append(self.getSQLFor(child, field, sqlPars))
return "^%s$"%("".join(parts))
_patOps = {
"~": "~*",
"=": "~",
"!~": "!~*",
"!": "!~",
"=~": "~*",
}
def _emitPatOp(self, node, field, sqlPars):
pattern = self._makePattern(node, field, sqlPars)
return "{} {} %({})s".format(
field.name,
self._patOps[node.operator],
node.getSQLKey(field.name, pattern, sqlPars))
def _emitEnum(self, node, field, sqlPars):
query = "{} IN ({})".format(
field.name,
", ".join(
"%({})s".format(node.insertChild(i, field, sqlPars))
for i in range(len(node.children))))
if node.operator=="!=,":
query = "NOT ({})".format(query)
return query
_translatedOps = {
"==": "=",
}
def _emitTranslatedOp(self, node, field, sqlPars):
return "{} = %({})s".format(field.name,
node.insertChild(0, field, sqlPars))
_nullOperators = {"*": ".*", "?": "."}
_standardOperators = set(["<", ">", "<=", ">=", "!="])
_sqlEmitters = {
"~": _emitPatOp,
"=": _emitPatOp,
"!~": _emitPatOp,
"!": _emitPatOp,
"=~": _emitPatOp, # this happens to work because of pattern escaping
"=,": _emitEnum,
"=|": _emitEnum,
"!=,": _emitEnum,
"==": _emitTranslatedOp,
}
[docs] def getSQLFor(self, node, field, sqlPars):
if node.operator=="[":
return "[%s]"%node.children[0]
if node.operator in self._nullOperators:
return self._nullOperators[node.operator]
else:
return _Flattener.getSQLFor(self, node, field, sqlPars)
def _makeFactory(parser, flattener):
def factory(field, val, sqlPars):
try:
tree = parser(val)
return flattener.getSQLFor(tree, field, sqlPars)
except ParseException:
raise base.ui.logOldExc(utils.ValidationError(
"Invalid input for type %s (see help for valid type literals)"%
field.type, field.name))
return factory
_dateFlattener = DateFlattener()
sqlmunge.registerSQLFactory("vexpr-float",
_makeFactory(parseNumericExpr, NumericFlattener()))
sqlmunge.registerSQLFactory("vexpr-date",
_makeFactory(parseDateExpr, _dateFlattener))
sqlmunge.registerSQLFactory("vexpr-mjd",
_makeFactory(parseDateExprToMJD, _dateFlattener))
sqlmunge.registerSQLFactory("vexpr-string",
_makeFactory(parseStringExpr, StringFlattener()))
[docs]class ToVexprConverter(typesystems.FromSQLConverter):
typeSystem = "vizierexpr"
simpleMap = {
"smallint": "vexpr-float",
"integer": "vexpr-float",
"int": "vexpr-float",
"bigint": "vexpr-float",
"real": "vexpr-float",
"float": "vexpr-float",
"double precision": "vexpr-float",
"double": "vexpr-float",
"text": "vexpr-string",
"unicode": "vexpr-string",
"char": "vexpr-string",
"date": "vexpr-date",
"timestamp": "vexpr-date",
"vexpr-date": "vexpr-date",
"vexpr-float": "vexpr-float",
"vexpr-string": "vexpr-string",
}
[docs] def mapComplex(self, sqlType, length):
if sqlType=="char":
return "vexpr-string"
if sqlType=="varchar":
return "vexpr-string"
getVexprFor = ToVexprConverter().convert
[docs]def makeConeSearchFor(inputKey):
"""returns an //scs#makeSpointCD condDesc tailored for inputKey.
"""
from gavo.svcs import standardcores
return base.parseFromString(standardcores.CondDesc, """
<FEED source="//scs#makeSpointCD"
tablehead=%s
matchColumn=%s/>
"""%(
utils.escapeAttrVal(inputKey.tablehead),
utils.escapeAttrVal(inputKey.name)))
[docs]def getPlaceholderFor(inputKey, values):
"""returns a placeholder (suggested input) for inputKey, where values
is the original values element.
This will currently be None unless we do a numeric input.
"""
if not values:
return
if inputKey.type=="vexpr-float":
scaling = inputKey.scaling or 1
return format_placeholder(values.min_typed, values.max_typed,
lambda val: "%s"%(val/scaling))
elif inputKey.type=="vexpr-mjd":
# date with underlying MJD column
return format_placeholder(values.min_typed, values.max_typed,
lambda val: "%s"%utils.formatISODT(stc.mjdToDateTime(val)))
elif inputKey.type=="vexpr-date":
# date with underlying timestamp column
return format_placeholder(values.min_typed, values.max_typed,
lambda val: "%s"%utils.formatISODT(val))
# fall through to None if no placeholder can be make
if __name__=="__main__": # pragma: no cover
import doctest
doctest.testmod()