"""
Description of columns (and I/O fields).
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
from gavo import adql
from gavo import base
from gavo import dm
from gavo import utils
from gavo.base import typesystems
from gavo.utils import codetricks
from gavo.votable import paramval
__docformat__ = "restructuredtext en"
# A set of database type names that need explicit null values when
# they are serialized into VOTables. We don't check array types
# here at all, since that's another can of worms entirely.
EXPLICIT_NULL_TYPES = set([
"smallint", "integer", "bigint", "char", "boolean", "bytea"])
[docs]class TypeNameAttribute(base.AtomicAttribute):
"""An attribute with values constrained to types we understand.
"""
@property
def typeDesc_(self):
return ("a type name; the internal type system is similar to SQL's"
" with some restrictions and extensions. The known atomic types"
" include: %s"%(", ".join(typesystems.ToPythonConverter.simpleMap)))
[docs] def parse(self, value):
try:
typesystems.sqltypeToVOTable(value)
except base.Error:
raise base.ui.logOldExc(base.LiteralParseError(self.name_, value,
hint="A supported SQL type was expected here. If in doubt,"
" check base/typeconversions.py, in particular ToPythonCodeConverter."))
return value
[docs] def unparse(self, value):
return value
[docs]class ParamNameAttribute(base.UnicodeAttribute):
"""An attribute containing a param or column name.
These, in DaCHS, have to match identifierPat (essentially, like
python identifiers). For compatibility with columns, they do
allow the quoted/ prefix, but the only place this is really supported
is in query generation.
"""
@property
def typeDesc_(self):
return ("A name for a table or service parameter. These have to match"
" ``%s``."
)%utils.identifierPattern.pattern
[docs] def parse(self, value):
if value.startswith("quoted/"):
return utils.QuotedName(value[7:])
if not utils.identifierPattern.match(value):
raise base.StructureError(
f"'{value}' is not a valid column identifier")
return value
[docs]class ColumnNameAttribute(ParamNameAttribute):
"""An attribute containing a name suitable for SQL table names.
Column names are special in that you can prefix them with "quoted/"
and then get a delimited identifier. This is something you probably
shouldn't use.
Using ADQL/SQL reserved words (without quoting) here yields a warning.
"""
@property
def typeDesc_(self):
return ("a column name within an SQL table. These have to match the"
" SQL regular_identifier production."
" In a desperate pinch, you can generate delimited identifiers"
" (that can contain anything) by prefixing the name with 'quoted/'")
[docs] def parse(self, value):
if value.startswith("quoted/"):
return utils.QuotedName(value[7:])
value = ParamNameAttribute.parse(self, value)
if value.upper() in adql.ALL_RESERVED_WORDS:
base.ui.notifyWarning("Column name '%s' coincides with a"
" ADQL/SQL reserved name. This may lead to serious trouble"
" later. Please consider changing the name, or prepend quoted/"
" to make it a delimited identifier."%value)
return value
[docs] def unparse(self, value):
if isinstance(value, utils.QuotedName):
return "quoted/"+value.name
else:
return value
class _AttBox(object):
"""A helper for TableManagedAttribute.
When a TableManagedAttribute ships off its value into an event
it packs its value into an _AttBox. That way, the receiver
can tell whether the value comes from another TableManagedAttribute
(which is ok) or comes from an XML parser (which is forbidden).
"""
def __init__(self, payload):
self.payload = payload
[docs]class TableManagedAttribute(base.AttributeDef):
"""An attribute not settable from XML for holding information
managed by the parent table.
That's stc and stcUtype here, currently.
Do not use this in new code. This should to when the stc element
can safely be replaced by gavo-dm-based stuff (here: DmRoles).
"""
typeDesc_ = "non-settable internally used value"
[docs] def feed(self, ctx, instance, value):
if isinstance(value, _AttBox):
# synthetic event during object copying, accept
self.feedObject(instance, value.payload)
else:
# do not let people set that stuff directly
raise base.StructureError("Cannot set %s attributes from XML"%self.name_)
[docs] def feedObject(self, instance, value):
setattr(instance, self.name_, value)
[docs] def iterEvents(self, instance):
val = getattr(instance, self.name_)
if val!=self.default_:
yield ("value", self.name_, _AttBox(val))
[docs] def getCopy(self, instance, newParent, ctx):
# these never get copied; the values are potentially shared
# between many objects, so the must not be changed anyway.
return getattr(instance, self.name_)
[docs]class OldRoles(object):
"""A sentinel class for Table to signal non-adapted DM roles on a column
or param.
"""
def __init__(self, oldRoles):
if isinstance(oldRoles, OldRoles):
self.oldRoles = oldRoles.oldRoles
else:
self.oldRoles = oldRoles
def __bool__(self):
return bool(self.oldRoles)
[docs]class DMRolesAttribute(base.AttributeDef):
"""An attribute managing DM roles.
It is not set directly from XML but filled when a table parses
DM annotation. When copying around columns between tables, this
is used to build the new annotation; the value is an oldAnnotations
instance rather than a list as usual until the new parent table has
started constructing its own DM annotations.
"""
typeDesc_ = "read-only list of roles played by this column in DMs"
def __init__(self, name, description="Undocumented"):
base.AttributeDef.__init__(self,
name, default=base.Computed, description=description,
copyable=True)
@property
def default_(self):
return []
[docs] def feedObject(self, instance, value):
setattr(instance, self.name_, value)
[docs] def iterEvents(self, instance):
# these are entirely externally managed
if False:
yield None
[docs] def getCopy(self, instance, newParent, ctx):
# Wrap the previous contents into a container that will prevent
# accidental changes and lets the new parent table figure out
# that the roles haven't been updated
val = getattr(instance, self.name_)
if val:
return OldRoles(val)
else:
return []
[docs]class RoEmptyDict(dict):
"""is a read-only standin for a dict.
It's hashable, though, since it's always empty... This is used here
for a default for displayHint.
"""
def __setitem__(self, what, where):
raise TypeError("RoEmptyDicts are immutable")
_roEmptyDict = RoEmptyDict()
[docs]class DisplayHintAttribute(base.AtomicAttribute):
"""is a display hint.
Display hint literals are comma-separated key=value sequences.
Keys are up to the application and evaluated by htmltable, votable, etc.
The parsed values are simply dictionaries mapping strings to strings, i.e.,
value validation cannot be performed here (yet -- do we want this?
A central repository of display hints would be kinda useful...)
"""
typeDesc_ = "Display hint"
def __init__(self, name, description, **kwargs):
base.AtomicAttribute.__init__(self, name, default=_roEmptyDict,
description=description, **kwargs)
[docs] def parse(self, value):
if not value.strip():
return _roEmptyDict
try:
return dict([f.split("=") for f in value.split(",")])
except (ValueError, TypeError):
raise base.ui.logOldExc(base.LiteralParseError(self.name_, value,
hint="DisplayHints have a format like tag=value{,tag=value}"))
[docs] def unparse(self, value):
return ",".join(
["%s=%s"%(k,v) for k,v in value.items()])
[docs]class Option(base.Structure):
"""A value for enumerated columns.
For presentation purposes, an option can have a title, defaulting to
the option's value.
"""
name_ = "option"
_title = base.UnicodeAttribute("title", default=base.NotGiven,
description="A Label for presentation purposes; defaults to val.",
copyable=True)
_val = base.DataContent(copyable=True, description="The value of"
" the option; this is what is used in, e.g., queries and the like.")
def __repr__(self):
# may occur in user messages from formal, so we use title.
return self.title
def __str__(self):
return repr(self)
[docs] def completeElement(self, ctx):
if self.title is base.NotGiven:
self.title = str(self.content_)
super().completeElement(ctx)
[docs]def makeOptions(*args):
"""returns a list of Option instances with values given in args.
"""
return [base.makeStruct(Option, content_=arg) for arg in args]
[docs]class Values(base.Structure):
"""Information on a column's values, in particular its domain.
This is quite like the values element in a VOTable. In particular,
to accommodate VOTable usage, we require nullLiteral to be a valid literal
for the parent's type.
Note that DaCHS does not validate for constraints from values on
table import. This is mainly because before ``dachs limits`` has run,
values may not represent the new dataset in semiautomatic values.
With HTTP parameters, values validation does take place (but again,
that's mostly not too helpful because there are query languages
sitting in between most of the time).
Hence, the main utility of values is metadata declaration, both
in the form renderer (where they become placeholders) and in datalink
(where they are communicated as VOTable values).
"""
name_ = "values"
_min = base.UnicodeAttribute("min", default=None,
description="Minimum acceptable"
" value as a datatype literal", copyable=True)
_max = base.UnicodeAttribute("max", default=None,
description="Maximum acceptable"
" value as a datatype literal", copyable=True)
_options = base.StructListAttribute("options",
childFactory=Option,
description="List of acceptable values (if set)", copyable=True)
_default = base.UnicodeAttribute("default", default=None,
description="A default"
" value (currently only used for options).", copyable=True)
_nullLiteral = base.UnicodeAttribute("nullLiteral", default=None,
description=
"An appropriate value representing a NULL for this column in VOTables"
" and similar places. You usually should only set it for integer"
" types and chars. Note that rowmakers make no use of this nullLiteral,"
" i.e., you can and should choose null values independently of"
" your source. Again, for reals, floats and (mostly) text you probably"
" do not want to do this.", copyable=True)
_multiOk = base.BooleanAttribute("multiOk", False, "Deprecated, use"
" multiplicity=multiple on input keys instead.", copyable=True)
_fromDB = base.ActionAttribute("fromdb", "_evaluateFromDB", description=
"A query fragment returning just one column to fill options from (will"
" add to options if some are given). Do not write SELECT or anything,"
" just the column name and the where clause. Do not do this for"
" large tables even if there are reasonably few values, because"
" there is no good way to speed up this kind of query using indices.")
_caseless = base.BooleanAttribute("caseless",
description="When validating, ignore the case of string values."
" For non-string types, behaviour is undefined (i.e., DaCHS is"
" going to spit on you).",
default=False,
copyable=True)
_percentile03 = base.UnicodeAttribute("percentile03",
default=None,
description="Value at the 3rd percentile of the distribution of this"
" column.",
copyable=True)
_percentile97 = base.UnicodeAttribute("percentile97",
default=None,
description="Value at the 97rd percentile of the distribution of this"
" column.",
copyable=True)
_median = base.UnicodeAttribute("median",
default=None,
description="Median of the distribution of this column.",
copyable=True)
_fillFactor = base.UnicodeAttribute("fillFactor",
default=None,
description="Ratio of non-NULL values to the number of rows in the"
" embedding table.",
copyable=True)
_original = base.OriginalAttribute()
validValues = None
[docs] @classmethod
def fromOptions(cls, labels):
"""returns Values with the elements of labels as valid options.
"""
return base.makeStruct(cls,
options=[base.makeStruct(Option, content_=l) for l in labels])
@property
def min_typed(self):
if not hasattr(self, "_min_type_cache"):
self._min_type_cache = self.makePythonVal(
self.min, typesystems.scalarify(self.parent.type))
return self._min_type_cache
@property
def max_typed(self):
if not hasattr(self, "_max_type_cache"):
self._max_type_cache = self.makePythonVal(
self.max, typesystems.scalarify(self.parent.type))
return self._max_type_cache
[docs] def makePythonVal(self, literal, sqltype):
if literal is None:
return None
return typesystems.sqltypeToPython(sqltype)(literal)
def _evaluateFromDB(self, ctx):
if not getattr(ctx, "doQueries", True):
return
try:
with base.getTableConn() as conn:
for row in conn.query(self.parent.parent.expand(
"SELECT DISTINCT %s"%(self.fromdb))):
if row[0] is None:
# a NULL here would be about the same as required=False;
# dealing with it properly in constraints is hard. Let's
# see how well we get away with ignoring it.
continue
self._options.feedObject(self, base.makeStruct(Option,
content_=row[0]))
except base.DBError: # Table probably doesn't exist yet, ignore.
base.ui.notifyWarning("Values fromdb '%s' failed, ignoring"%self.fromdb)
[docs] def onParentComplete(self):
"""converts options, the nullLiteral, and the default from
string literals to python values.
"""
dataField = self.parent
if self.options:
dbt = dataField.type
for opt in self.options:
opt.content_ = self.makePythonVal(opt.content_, dbt)
self.validValues = set(o.content_ for o in self.options)
if self.caseless:
self.validValues = set(o and o.lower() for o in self.validValues)
if self.nullLiteral:
try:
self.makePythonVal(self.nullLiteral, dataField.type)
except ValueError:
raise base.LiteralParseError("nullLiteral", self.nullLiteral,
hint="If you want to *parse* whatever you gave into a NULL,"
" use the parseWithNull function in a rowmaker. The null"
" literal gives what value will be used for null values"
" when serializing to VOTables and the like.")
if self.default and isinstance(self.default, str):
type, arraysize, xtype = dataField._getVOTableType()
self.default = paramval.getVOTParser(type, arraysize, xtype)(
self.default)
[docs] def setFromStatRow(self, statRow, ctx):
"""fills statistics data from one of our statistics rows.
This will not overwrite values already given to let people
manually give statistics.
"""
for srcKey, destKey in [
("min_value", "min"),
("max_value", "max"),
("percentile03", "percentile03"),
("percentile97", "percentile97"),
("median", "median"),
("fill_factor", "fillFactor"),
]:
if srcKey in statRow:
if (getattr(self, destKey, None) is None
and statRow[srcKey] is not None):
self.feed(destKey, str(statRow[srcKey]), ctx)
[docs] def setOptionsFromDict(self, dist, ctx):
"""sets Options from the keys of the dict dist.
This is primarily for injection of values from the DB into the RD.
It will hence not change anything if there are already options
defined (which are presumably manually set).
"""
if self.options:
return
self.options = makeOptions(*dist.keys())
[docs] def validateOptions(self, value):
"""returns false if value isn't either in options or doesn't consist of
items in options.
Various null values always validate here; non-null checking is done
by the column on its required attribute.
"""
if value=="None":
assert False, "Literal 'None' passed as a value to validateOptions"
if self.validValues is None:
return True
if self.caseless and value:
value = value.lower()
if isinstance(value, (list, tuple, set)):
for val in value:
if val and not val in self.validValues:
return False
else:
return value in self.validValues or value is None
return True
[docs]class ColumnBase(base.Structure, base.MetaMixin):
"""A base class for columns, parameters, output fields, etc.
Actually, right now there's far too much cruft in here that
should go into Column proper or still somewhere else. Hence:
XXX TODO: Refactor.
See also Column for a docstring that still applies to all we've in
here.
"""
_name = ParamNameAttribute("name", default=base.Undefined,
description="Name of the param",
copyable=True, before="type")
_type = TypeNameAttribute("type", default="real", description=
"datatype for the column (SQL-like type system)",
copyable=True, before="unit")
_unit = base.UnicodeAttribute("unit", default="", description=
"Unit of the values. Use VOUnits syntax and use single quotes when"
" you use custom units (you should avoid that).",
copyable=True, before="ucd", strip=True)
_ucd = base.UnicodeAttribute("ucd", default="", description=
"UCD of the column", copyable=True, before="description")
_description = base.NWUnicodeAttribute("description",
default="", copyable=True,
description="A short (one-line) description of the values in this column.")
_tablehead = base.UnicodeAttribute("tablehead", default=None,
description="Terse phrase to put into table headers for this"
" column", copyable=True)
_utype = base.UnicodeAttribute("utype", default=None, description=
"utype for this column", copyable=True)
_required = base.BooleanAttribute("required", default=False,
description="Record becomes invalid when this column is NULL",
copyable=True)
_displayHint = DisplayHintAttribute("displayHint",
description="Suggested presentation; the format is "
" <kw>=<value>{,<kw>=<value>}, where what is interpreted depends"
" on the output format. See, e.g., documentation on HTML renderers"
" and the formatter child of outputFields.", copyable=True)
_verbLevel = base.IntAttribute("verbLevel", default=20,
description="Minimal verbosity level at which to include this column",
copyable=True)
_values = base.StructAttribute("values", default=None,
childFactory=Values, description="Specification of legal values",
copyable=True)
_fixup = base.UnicodeAttribute("fixup", description=
"A python expression the value of which will replace this column's"
" value on database reads. Write a ___ to access the original"
' value. You can use macros for the embedding table.'
' This is for, e.g., simple URL generation'
' (fixup="\'\\internallink{/this/svc}\'+___").'
' It will *only* kick in when tuples are deserialized from the'
" database, i.e., *not* for values taken from tables in memory.",
default=None, copyable=True)
_note = base.UnicodeAttribute("note", description="Reference to a note meta"
" on this table explaining more about this column", default=None,
copyable=True)
_xtype = base.UnicodeAttribute("xtype", description="VOTable xtype giving"
" the serialization form; you usually do *not* want to set this,"
" as the xtypes actually used are computed from database type."
" DaCHS xtypes are only used for a few unsavoury, hopefully temporary,"
" hacks", default=None, copyable=True)
_stc = TableManagedAttribute("stc", description="Internally used"
" STC information for this column (do not assign to unless instructed"
" to do so)",
default=None, copyable=True)
_stcUtype = TableManagedAttribute("stcUtype", description="Internally used"
" STC information for this column (do not assign to)",
default=None, copyable=True)
_dmRoles = DMRolesAttribute("dmRoles",
description="Roles played by this column; cannot be assigned to.")
_properties = base.PropertyAttribute(copyable=True)
_original = base.OriginalAttribute()
restrictedMode = False
def __repr__(self):
return "<Column %s>"%repr(self.name)
[docs] def setParent(self, parent):
# overridden to turn off automatic meta parenting from MetaMixin.
# This does not upcall on purpose.
self.parent = parent
[docs] def onParentComplete(self):
# we need to resolve note on construction since columns are routinely
# copied to other tables and meta info does not necessarily follow.
if isinstance(self.note, str):
try:
self.note = self.parent.getNote(self.note)
except base.NotFoundError: # non-existing notes silently ignored
self.note = None
[docs] def completeElement(self, ctx):
self.restrictedMode = getattr(ctx, "restricted", False)
if isinstance(self.name, utils.QuotedName):
self.key = self.name.name
if ')' in self.key:
# No '()' allowed in key for that breaks the %()s syntax (sigh!).
# Work around with the following quick hack that would break
# if people carefully chose proper names. Anyone using delim.
# ids in SQL deserves a good spanking anyway.
self.key = self.key.replace(')', "__").replace('(', "__")
else:
self.key = self.name
super().completeElement(ctx)
[docs] def isEnumerated(self):
return self.values and self.values.options
[docs] def validate(self):
super().validate()
if self.restrictedMode and self.fixup:
raise base.RestrictedElement("fixup")
if self.xtype=="adql:TIMESTAMP":
base.ui.notifyWarning("At %s: Do not use adql:TIMESTAMP xtype any more."
" Simply drop xtype for timestamp and date-typed columns."%(
self.getSourcePosition()))
[docs] def validateValue(self, value):
"""raises a ValidationError if value does not match the constraints
given here.
"""
if value is None:
if self.required:
raise base.ValidationError(
"Field %s is empty but non-optional"%self.name, self.name)
return
# Only validate these if we're not a database column
if not isinstance(self, Column):
vals = self.values
if vals:
if vals.options:
if value and not vals.validateOptions(value):
raise base.ValidationError("Value %s not consistent with"
" legal values %s"%(value, vals.options), self.name)
else:
if vals.min and value<vals.min_typed:
raise base.ValidationError("%s too small (must be at least %s)"%(
value, vals.min), self.name)
if vals.max and value>vals.max_typed:
raise base.ValidationError("%s too large (must be less than %s)"%(
value, vals.max), self.name)
[docs] def isIndexed(self):
"""returns a set of index kinds if this column is indexed, False if
it isn't or None if we don't know because we don't have a proper parent.
In other words: it's a true value if the thing is indexed, a false
one if it is. All further refinements are probably only there
for more informed ADQL query morphing.
"""
if self.parent and hasattr(self.parent, "indexedColumns"):
# parent is something like a TableDef
return self.parent.indexedColumns.get(self.name, False)
[docs] def isPrimary(self):
"""returns a guess as to whether this column is a primary key of the
embedding table.
This may return True, False, or None (unknown).
"""
if self.parent and hasattr(self.parent, "primary"):
# parent is something like a TableDef
if self.name in self.parent.primary:
return True
else:
return False
[docs] def isScalar(self):
"""returns true if we consider the array's values as scalar.
A single string is scalar for us.
"""
type, arraysize, _ = self._getVOTableType()
if type=="char" or type=="unicodeChar":
return "x" not in arraysize
else:
return arraysize is None
_indexedCleartext = {
True: "indexed",
False: "notIndexed",
None: "unknown",
}
[docs] def asInfoDict(self):
"""returns a dictionary of certain, "user-interesting" properties
of the data field, in a dict of strings.
"""
return {
"name": str(self.name),
"type": self.type,
"description": self.description or "N/A",
"tablehead": self.getLabel(),
"unit": self.unit or "N/A",
"ucd": self.ucd or "N/A",
"verbLevel": self.verbLevel,
"indexState": self._indexedCleartext[bool(self.isIndexed())],
"note": self.note,
}
[docs] def getDDL(self):
"""returns an SQL fragment describing this column ready for
inclusion in a DDL statement.
"""
type = base.sqltypeToPG(self.type)
# The "str" does magic for delimited identifiers, so it's important.
items = [str(self.name), type]
if self.required:
items.append("NOT NULL")
return " ".join(items)
[docs] def getDisplayHintAsString(self):
return self._displayHint.unparse(self.displayHint)
[docs] def getLabel(self):
"""returns a short label for this column.
The label is either the tablehead or, missing it, the capitalized
column name.
"""
if self.tablehead is not None:
return self.tablehead
return str(self.key).capitalize()
def _getVOTableType(self):
"""returns the VOTable type, arraysize and xtype for this
column-like thing.
"""
type, arraysize, xtype = base.sqltypeToVOTable(self.type)
if self.type=="date":
xtype = "dachs:DATE"
return type, arraysize, xtype
[docs]class Column(ColumnBase):
"""A database column.
Columns contain almost all metadata to describe a column in a database
table or a VOTable (the exceptions are for column properties that may
span several columns, most notably indices).
Note that the type system adopted by the DC software is a subset
of postgres' type system. Thus when defining types, you have to
specify basically SQL types. Types for other type systems (like
VOTable, XSD, or the software-internal representation in python values)
are inferred from them.
Columns can have delimited identifiers as names. Don't do this, it's
no end of trouble. For this reason, however, you should not use name
but rather key to programmatially obtain field's values from rows.
Properties evaluated:
- std -- set to 1 to tell the tap schema importer to have the column's
std column in TAP_SCHEMA 1 (it's 0 otherwise).
- statisticsTarget -- an integer to be set as this column's
statistics-gathering target. Set this to something between 100 and
10000 on postgres if you have large tables and columns with strongly
non-uniform distributions. Set to -1 to revert to the system default.
gavo imp -m will apply changes here; you'll manually have to run
``analyze <tablename>`` after that.
- statistics -- set this to "no" to keep DaCHS from using this column
in dachs limits. Set this to "enumerate" to make DaCHS collect
the discrete values allowed (currently only supported for strings).
- targetType -- for a column containing a URL, the media type of the
resource pointed at. This is for producing extra annotation for
Aladin and friends as per
http://mail.ivoa.net/pipermail/dal/2018-May/008017.html
- targetTitle -- if you give targetType, use this to set the link
title (defaults to "Link").
"""
name_ = "column"
_name = ColumnNameAttribute("name", default=base.Undefined,
description="Name of the column",
copyable=True, before="type")
_hidden = base.BooleanAttribute("hidden", default=False,
description="Hide the column from most of the user interface"
" (specifically, you can't use it in TAP queries or results,"
" and it won't be in TAP_SCHEMA). You typically want this for"
" internal, administrative columns.", copyable=True)
_ignored = base.DataContent(description="Columns admit data"
" content but ignore it. This is exclusively a convenience"
" for building columns from params and should not be used for"
" anything else.", copyable=False)
[docs] def validate(self):
super().validate()
# Now check if we can serialize the column safely in VOTables.
# I only want to hear about this when the column may end up in
# a VOTable;
if self.type in EXPLICIT_NULL_TYPES:
if not self.required and not (
self.values and self.values.nullLiteral):
try:
pos = codetricks.stealVar("context").pos
base.ui.notifyWarning("Somewhere near %s: "
" Column %s may be null but has no explicit"
" null value."%(pos, self.name))
except (ValueError, AttributeError):
# This is stealVar's ValueError, we have no context in stack (or
# it's a context var not from our parsing code).
# Seems we're not parsing from a file, so the user probably
# can't help it anyway. Don't complain.
pass
[docs] def getAnnotation(self, roleName, container, instance):
if self.parent==container:
return dm.ColumnAnnotation(roleName, self, instance)
else:
raise base.ReportableError("You cannot use columns from"
" other tables in your DM annotations directly.",
hint="If you really need something like this, you need to"
" define a datatype corresponding to what's in the other table"
" and reference a corresponding dm declaration.")
[docs] def updateFromContext(self, tableId, columnName, ctx):
"""pulls the injected column metadata from the context into our values.
This is being called from table's completeElement -- only then
is everything we depend on reasonably complete, and table
knows better if we should even bother.
"""
statRow = ctx.getInjected(f"colstats:{tableId}:{columnName}", None)
discDist = ctx.getInjected(
f"discrete-strings:{tableId}:{columnName}", None)
if not statRow and not discDist:
return
if not self.values:
self.values = base.makeStruct(Values)
if statRow:
self.values.setFromStatRow(statRow, ctx)
if discDist:
self.values.setOptionsFromDict(discDist, ctx)
[docs] def getStatistics(self):
"""returns a dictionary with COLSTATS (or later VS.column) keys
containing any statistic information in this column's values.
Values are blindly stringified right now; we may have to switch to
using VOTable tabledata serialisation as we allow column statistics
on items that have non-trivial serialisations.
"""
res = {}
if not self.values:
return res
for attName, colstatsName in [
("min", "minValue"),
("max", "maxValue"),
("median", "median"),
("percentile03", "percentile03"),
("percentile97", "percentile97"),
("fillFactor", "fillFactor"),]:
val = getattr(self.values, attName)
if val is not None and val!="":
res[colstatsName] = val
return res
[docs]class ParamBase(ColumnBase):
"""A basic parameter.
This is the base for both Param and InputKey.
"""
_value = base.DataContent(description="The value of parameter."
" It is parsed according to the param's type using the default"
" parser for the type VOTable tabledata.", default=base.NotGiven,
copyable=True, expand=True)
_valueCache = base.Undefined
__contentStore = base.NotGiven
nullLiteral = ""
unprocessedTypes = set(["raw", "file"])
def __repr__(self):
try:
contentSer = repr(self.content_)
except:
contentSer = "<not serialisable>"
return f"<{self.__class__.__name__}, {self.name}={contentSer}>"
def __set_content(self, val):
self.__contentStore = val
def __get_content(self):
if (self.__contentStore is base.NotGiven
and self._valueCache is not base.Undefined):
self.__contentStore = self._unparse(self._valueCache)
return self.__contentStore
content_ = property(__get_content, __set_content)
[docs] def expand(self, value):
"""hands up macro expansion requests to a parent, if there is one
and it can handle expansions.
"""
if hasattr(self.parent, "expand"):
return self.parent.expand(value)
return value
[docs] def completeElement(self, ctx):
if not self.values:
self.values = base.makeStruct(Values, parent_=self)
super().completeElement(ctx)
[docs] def onElementComplete(self):
super().onElementComplete()
if self.content_ is base.NotGiven:
if self.values.default is not None:
self.set(self.values.default)
else:
self.set(self.content_)
@property
def value(self):
"""returns a typed value for the parameter.
Unset items give None here.
"""
if self._valueCache is base.Undefined:
if self.content_ is base.NotGiven:
self._valueCache = None
else:
self._valueCache = self._parse(self.content_)
return self._valueCache
[docs] def getStringValue(self):
"""returns a string serialisation of the value.
This is what would reproduce the value if embedded in an XML
serialisation of the param.
"""
if self.type in self.unprocessedTypes:
return "(Unrepresentable %s)"%self.type
return self.content_
[docs] def set(self, val):
"""sets this parameter's value.
val can be a python value, or string literal. In the second
case, this string literal will be preserved in string serializations
of this param.
If val is an invalid value for this item, a ValidationError is
raised and the item's value will be Undefined.
"""
if isinstance(val, str):
self.content_ = val
else:
self.content_ = base.NotGiven
self._valueCache = self._parse(val)
def _parse(self, literal, atom=False):
"""parses literal using the default value parser for this param's
type.
If literal is not a string, it will be returned unchanged.
The method also makes sure literal matches any constraints
set by a values child and raises a ValidationError if not.
"""
if self.type in self.unprocessedTypes:
return literal
elif not isinstance(literal, str):
return literal
elif literal=="__NULL__" or literal=="":
value = None
elif ((self.type=="text" or self.type=="unicode")
and literal=="__EMPTY__"):
value = ""
else:
if literal==self.values.nullLiteral:
value = None
else:
try:
type, arraysize, xtype = self._getVOTableType()
if atom:
arraysize = None
if type not in ["char", "unicodeChar"]:
literal = literal.strip()
if self.xtype=="interval":
# prefer interval datatype over anything user-specified,
# and do special magic on xtype collision
if self.type=="timestamp":
type = "timestamp-interval"
else:
xtype = xtype or self.xtype
else:
xtype = self.xtype or xtype
value = paramval.getVOTParser(type, arraysize, xtype)(literal)
# make NaNs NULL here for consistent VOTable practice
if value!=value:
value = None
except ValueError:
raise base.ValidationError("%s is not a valid literal for %s"%(
repr(literal), self.name), self.name)
if not self.values.validateOptions(value):
# as long as we still have shitty "PQL" (old SIA, SSA and such):
# we need to skip pql-float and pql-int, as these can have
# sane Value ranges/enumerations but values like "4/5".
if self.type not in ["pql-int", "pql-float"]:
raise base.ValidationError("%s is not a valid value for %s"%(
repr(literal), self.name), self.name)
# unify NULL value representation to the empty string
if value is None:
self.content_ = ""
return value
def _unparse(self, value):
"""returns a string representation of value appropriate for this
type.
Actually, for certain types only handled internally (like file or raw),
this is not a string representation at all but just the python stuff.
Plus, right now, for sequences we're not doing anything. We probably
should; but we'll need to be much more careful in ContextGramar then.
"""
if self.type in self.unprocessedTypes:
return value
if value is None:
return ""
else:
type, arraysize, xtype = self._getVOTableType()
# deal as gracefully as possible with xtype collisions, and
# prefer manually set xtype if possible
if xtype and not "interval" in xtype:
xtype = self.xtype or xtype
else:
xtype = xtype or self.xtype
val = paramval.getVOTSerializer(type, arraysize, xtype)(value)
return val
[docs]class Param(ParamBase):
"""A table parameter.
This is like a column, except that it conceptually applies to all
rows in the table. In VOTables, params will be rendered as
PARAMs.
While we validate the values passed using the DaCHS default parsers,
at least the VOTable params will be literal copies of the string
passed in.
You can obtain a parsed value from the value attribute.
Null value handling is a bit tricky with params. An empty param (like
``<param name="x"/>)`` is always NULL (None in python).
In order to allow setting NULL even where syntactially something has
to stand, we also turn any __NULL__ to None.
For floats, NaN will also yield NULLs. For integers, you can also
use
<param name="x" type="integer"><values nullLiteral="-1"/>-1</params>
For arrays, floats, and strings, the interpretation of values is
undefined. Following VOTable practice, we do not tell empty strings and
NULLs apart; for internal usage, there is a little hack: __EMPTY__ as literal
does set an empty string. This is to allow defaulting of empty strings -- in
VOTables, these cannot be distinguished from "true" NULLs.
"""
name_ = "param"
_hidden = base.BooleanAttribute("hidden", default=False,
description="Ignored on params, just present for constructor compatibility"
" with column")
[docs] def validate(self):
super().validate()
if self.content_ is base.NotGiven:
self.set(None)
if self.required and self.value is None:
raise base.StructureError("Required value not given for param"
" %s"%self.name)
try:
# the value property will bomb on malformed literals
self.value
except ValueError as msg:
raise base.LiteralParseError(self.name, self.content_,
hint="Param content must be parseable as in VOTable TABLEDATA."
" The value you passed caused the error: %s"%msg)
[docs] def set(self, val):
"""sets the value of the parameter.
Macros will be expanded if the parent object supports macro
expansion.
"""
if (isinstance(val, str)
and "\\" in val
and hasattr(self.parent, "expand")):
val = self.parent.expand(val)
return ParamBase.set(self, val)
[docs] def getAnnotation(self, roleName, container, instance):
"""returns a dm annotation for this param (i.e., a paramRef).
"""
# TODO: Figure out how to make sure that this param actually
# ends up in the target VOTable. Or do we want to enforce
# params to be within the table?
return dm.ParamAnnotation(roleName, self, instance)
[docs] def copy(self, parent, ctx=None):
# overridden so we can set originalParent, which in turn we need
# in DM serialisation to VOTables
newInstance = super().copy(parent, ctx)
newInstance.originalParam = self
return newInstance