"""
Functions taking strings and returning python values.
All of them accept None and return None for Nullvalue processing.
All of them leave values alone if they already have the right type.
This is usually used in conjunction with
base.typesystems.ToPythonCodeConverter.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import functools
import re
# let's depend on psycopg2 for now as regards ranges. We can always
# provide a thin shim here if we want to use different databases.
# Advise users to only use base.NumericRange, though.
from psycopg2.extras import NumericRange
from gavo import utils
from gavo.stc import parseSimpleSTCS
from gavo.utils import pgsphere
from gavo.utils import identity #noflake: exported name
from gavo.utils import (parseDefaultDatetime, #noflake: exported names
parseDefaultDate, parseDefaultTime)
[docs]@utils.document
def parseInt(literal):
"""returns an int from a literal, or None if literal is None or an empty
string.
>>> parseInt("32")
32
>>> parseInt("")
>>> parseInt(None)
"""
if literal is None or (isinstance(literal, str
) and not literal.strip()):
return
return int(literal)
_inf = float("Inf")
[docs]@utils.document
def parseFloat(literal):
"""returns a float from a literal, or None if literal is None or an empty
string.
Temporarily, this includes a hack to work around a bug in psycopg2.
>>> parseFloat(" 5e9 ")
5000000000.0
>>> parseFloat(None)
>>> parseFloat(" ")
>>> parseFloat("wobbadobba")
Traceback (most recent call last):
ValueError: could not convert string to float: 'wobbadobba'
"""
if (literal is None or
(isinstance(literal, str) and not literal.strip())):
return None
res = float(literal)
return res
_trueLiterals = set(["true", "yes", "t", "on", "enabled", "1"])
_falseLiterals = set(["false", "no", "f", "off", "disabled", "0"])
[docs]@utils.document
def parseBooleanLiteral(literal):
"""returns a python boolean from some string.
Boolean literals are strings like True, false, on, Off, yes, No in
some capitalization.
"""
if literal is None or isinstance(literal, bool):
return literal
if hasattr(literal, "item"):
# numpy _bool
return literal.item()
literal = literal.lower()
if literal in _trueLiterals:
return True
elif literal in _falseLiterals:
return False
else:
raise ValueError(
"'%s' is no recognized boolean literal."%literal)
[docs]def parseUnicode(literal):
if literal is None:
return
if isinstance(literal, bytes):
literal = literal.decode("ascii", "replace")
return str(literal)
[docs]def parseBytes(literal):
r"""returns bytes from a literal.
This will interpret hex and octal byte escapes, and it'll support lists
of integer-like things; not sure if that's actually more harmful than good.
But then people can always override the default behaviour.
>>> parseBytes("abc")
b'abc'
>>> parseBytes(r"\xab\000")
b'\xab\x00'
>>> parseBytes([123, 231, 23])
b'{\xe7\x17'
>>> parseBytes([10002])
Traceback (most recent call last):
ValueError: bytes must be in range(0, 256)
"""
if literal is None:
return
elif isinstance(literal, bytes):
return literal
elif isinstance(literal, list):
return bytes(literal)
elif isinstance(literal, str):
return re.sub(
br"\\(\d\d\d)",
lambda mat: bytes([int(mat.group(1), 8)]),
re.sub(br"\\x([a-zA-Z0-9][a-zA-Z0-9])",
lambda mat: bytes([int(mat.group(1), 16)]),
literal.encode("ascii")))
else:
raise ValueError("No idea how to make bytes from %s"%repr(literal))
[docs]def parseCooPair(soup):
"""returns a pair of RA, DEC floats if they can be made out in soup
or raises a value error.
No range checking is done (yet), i.e., as long as two numbers can be
made out, the function is happy.
>>> parseCooPair("23 12")
(23.0, 12.0)
>>> parseCooPair("23.5,-12.25")
(23.5, -12.25)
>>> parseCooPair("3.75 -12.125")
(3.75, -12.125)
>>> parseCooPair("3 25,-12 30")
(51.25, -12.5)
>>> ["{:.9f}".format(v) for v in parseCooPair("12 15 30.5 +52 18 27.5")]
['183.877083333', '52.307638889']
>>> parseCooPair("3.39 -12 39")
Traceback (most recent call last):
ValueError: Invalid time with sepChar None: '3.39'
>>> parseCooPair("12 15 30.5 +52 18 27.5e")
Traceback (most recent call last):
ValueError: 12 15 30.5 +52 18 27.5e has no discernible position in it
>>> parseCooPair("QSO2230+44.3")
Traceback (most recent call last):
ValueError: QSO2230+44.3 has no discernible position in it
"""
soup = soup.strip()
def parseFloatPair(soup):
mat = re.match("(%s)\s*[\s,/]\s*(%s)$"%(utils.floatRE,
utils.floatRE), soup)
if mat:
return float(mat.group(1)), float(mat.group(2))
def parseSexa(soup):
timeangleRE = r"(?:\d+\s+)?(?:\d+\s+)?\d+(?:\.\d*)?"
dmsRE = "[+-]?\s*(?:\d+\s+)?(?:\d+\s+)?\d+(?:\.\d*)?"
mat = re.match("(%s)\s*[\s,/]?\s*(%s)$"%(timeangleRE, dmsRE), soup)
if mat:
try:
return utils.hmsToDeg(mat.group(1)), utils.dmsToDeg(
mat.group(2))
except utils.Error as msg:
raise utils.logOldExc(ValueError(str(msg)))
def parseSexaColon(soup):
timeangleRE = r"(?:\d+:)?(?:\d+:)?\d+(?:\.\d*)?"
dmsRE = "[+-]?\s*(?:\d+:)?(?:\d+:)?\d+(?:\.\d*)?"
mat = re.match("(%s)\s*[\s,/]?\s*(%s)$"%(timeangleRE, dmsRE), soup)
if mat:
try:
return (utils.hmsToDeg(mat.group(1), sepChar=":"),
utils.dmsToDeg(mat.group(2), sepChar=":"))
except utils.Error as msg:
raise utils.logOldExc(ValueError(str(msg)))
for func in [parseFloatPair, parseSexa, parseSexaColon]:
res = func(soup)
if res:
return res
raise ValueError("%s has no discernible position in it"%soup)
[docs]def parseSPoint(soup):
"""returns an ``SPoint`` for a coordinate pair.
The coordinate pair can be formatted in a variety of ways; see the
`function parseCooPair`_. Input is always in degrees.
"""
if soup is None or isinstance(soup, pgsphere.SPoint):
return soup
return pgsphere.SPoint.fromDegrees(*parseCooPair(soup))
def parseInterval(soup):
"""tries to parse a numeric interval out of soup.
Technically, we expect a space-separated pair of numeric somethings.
If a part can be parsed as an int is, else it becomes a float.
What's returned is a NumericRange object (currently from psycopg2, but
you should only import NumericRange from base).
Invalid literals raise some sort of ValueError.
>>> parseInterval('3 4')
NumericRange(3, 4, '[)')
>>> parseInterval('3.5 4.75')
NumericRange(3.5, 4.75, '[)')
>>> parseInterval('20')
Traceback (most recent call last):
ValueError: Not a valid numeric interval literal: '20'
>>> parseInterval('gabba gubbu')
Traceback (most recent call last):
ValueError: could not convert string to float: 'gabba'
"""
try:
lower, upper = soup.split()
except ValueError:
raise ValueError("Not a valid numeric interval literal: %s"%repr(soup))
try:
lower = int(lower)
except ValueError:
lower = float(lower)
try:
upper = int(upper)
except ValueError:
upper = float(upper)
return NumericRange(lower, upper)
[docs]def originalOrIdentity(soup):
"""returns soup.original or soup if there is no original attribute.
This is for cooperation with BinaryItem coming in from the web into
ContextGrammars.
"""
return getattr(soup, "original", soup)
def _numericRangeFactory(colDesc):
"""A factory to serialise psycopg numeric ranges into VOTables.
If what's coming in is a NumericRange, it's turned into a 2-tuple;
else it's left alone.
(as of 1.2, DaCHS can't really read the result again properly)
"""
if colDesc["xtype"]=="interval" and colDesc["datatype"] in [
"int", "long", "float", "double"]:
def mapper(val):
if val is None:
return None
if isinstance(val, NumericRange):
if val.upper_inc and colDesc["datatype"] in ["int", "long"]:
return (val.lower, val.upper+1)
return (val.lower, val.upper)
return val
return mapper
utils.registerDefaultMF(_numericRangeFactory)
[docs]@functools.lru_cache(1)
def getDefaultValueParsers():
"""returns a dict containing all exported names from this module.
This is useful with typesystems.ToPythonCodeConverter; see
rscdef.column.Parameter for an example.
This is always the same dict; thus, if you change it, copy it first.
"""
all = set(__all__)
return dict((n,v) for n,v in globals().items() if n in all)
def _test():
import doctest
doctest.testmod()
if __name__=="__main__":
_test()
__all__ = ["parseInt", "parseFloat", "parseBooleanLiteral", "parseUnicode",
"parseDefaultDate", "parseDefaultTime", "parseDefaultDatetime",
"parseCooPair", "getDefaultValueParsers", "parseSPoint", "parseSimpleSTCS",
"NumericRange", "originalOrIdentity", "parseBytes"]