Conversions between type systems.

The DC software has to deal with a quite a few type systems (see
base.typesystems). In general, we keep metadata in the SQL type system;
in particular, column's and param's type attribute takes values in that.

In fact, we use a couple of extensions:

	- file -- this corresponds to a file upload from the web (i.e., a pair
		(filename, file object)).  It would be conceivable to turn this into
		blobs at some point, but right now we simply don't touch it.
	- vexpr-float, -text, -date, -mjd -- vizier-like expressions coming in from
		the web.  These are always strings.
	- raw -- handed right through, whatever it is.  For target formats that
		can't do this, usually strings are used.
	- unicode -- this is TEXT in the database, but while normal text will
	  be rendered as byte strings in VOTables (with non-ASCII-characters
	  replaced by ?), unicode will become an array of unicodeChars.

This module contains a base class and the VOTable type system conversion,
as the VOTable module (that should not depend on base) depends on it.
The remaining actual converters are in base.typesystems, as they may depend
on details of base.  Even the SQL converters should be taken from there
when code can rely on gavo.base; this module should be considered an
implementation detail.

import re

from gavo.utils import excs

[docs]class ConversionError(excs.Error): pass
[docs]class FromSQLConverter(object): """is an abstract base class for type converters from the SQL type system. Implementing classes have to provide a dict simpleMap mapping sql type strings to target types, and a method mapComplex that receives a type and a length (both strings, derived from SQL array types) and either returns None (no matching type) or the target type. Implementing classes should also provide a typeSystem attribute giving a short name of the type system they convert to. """ _charTypes = set(["character varying", "varchar", "character", "char"]) def _parseSQLDim(self, subscript): """returns a VOTable-style arraysize specifiation for a SQL subscript expression. """ parts = re.findall("[\[(](\d*|\*)[\])]", subscript) # In VOTable, we can only have * on the last dimension; we'll error out # for now if we are supposed to make some other type for part in parts[:-1]: if part in ("", "*"): raise ValueError( "Arrays may only have variable length in the last dimension") if parts[-1]=="": parts[-1] = "*" return "x".join(parts)
[docs] def convert(self, sqlType): res = None if sqlType in self.simpleMap: res = self.simpleMap[sqlType] else: # TODO: the RE is too permissive and will let through things # _parseSQLDim cannot parse. Perhaps change the whole thing # to pyparsing? mat = re.match(r"([^[(]*)([\[(]([0-9*\[\]()]*)[\])])$", sqlType) if mat: res = self.mapComplex(, self._parseSQLDim( if res is None: if sqlType=="raw": return "raw" raise ConversionError("No %s type for %s"%(self.typeSystem, sqlType)) return res
[docs] def mapComplex(self, type, length): return
[docs]class ToVOTableConverter(FromSQLConverter): typeSystem = "VOTable" simpleMap = { "smallint": ("short", None, None), "integer": ("int", None, None), "bigint": ("long", None, None), "real": ("float", None, None), "boolean": ("boolean", None, None), "double precision": ("double", None, None), "text": ("char", "*", None), "char": ("char", "1", None), # Note: "1" is TOPCAT workaround of 2018. "date": ("char", "*", None), "timestamp": ("char", "19", "timestamp"), "time": ("char", "*", None), "box": ("double", "*", None), "vexpr-mjd": ("char", "*", None), "vexpr-string": ("char", "*", None), "vexpr-date": ("char", "*", None), "vexpr-float": ("char", "*", None), "file": ("bytea", "*", None), # this is for (lame) metadata generation "pql-float": ("char", "*", None), "pql-string": ("char", "*", None), "pql-date": ("char", "*", None), "pql-int": ("char", "*", None), "pql-upload": ("char", "*", None), # (the upload parameter) "raw": ("unsignedByte", "*", None), "bytea": ("unsignedByte", None, None), "spoint": ("double", "2", "point"), "scircle": ("double", "3", "circle"), "spoly": ("double", "*", "polygon"), "smoc": ("char", "*", "moc"), "sbox": ("double", "4", "x-box"), "unicode": ("unicodeChar", "*", None), "int4range": ("int", "2", "interval"), }
[docs] def mapComplex(self, type, length): if length=='': length = '*' if type in self._charTypes: return "char", length, None # consequence of TOPCAT workaround as of 2018 if type=="char" and length=='1': length = None # timestamp and interval both need xtype, and that's not really defined. # Oh, madness. Do a secret handshake with the encoder for custom # behaviour. if type=="timestamp" and length=="2": return "char", "19x2", "timestamp-interval" if length is not None: # Special handling for arrays of something if type=="bytea": return ("unsignedByte", '*', None) # we may leave a leading * there, which upstream would have # to fix (oh, madness) try: t, l, xtype = self.simpleMap[type] except KeyError: raise excs.StructureError( f"Cannot infer a VOTable type for '{type}'") if l is None: newLength = length else: newLength = "%sx%s"%(l, length) return (t, newLength, xtype) raise NotImplementedError( "VOTable mapComplex cannot handle %s[%s]"%(type, length))
[docs]class FromVOTableConverter(object): typeSystem = "db" simpleMap = { ("short", '1'): "smallint", ("int", '1'): "integer", ("long", '1'): "bigint", ("float", '1'): "real", ("boolean", '1'): "boolean", ("double", '1'): "double precision", ("char", "*"): "text", ("char", '1'): "char", ("unsignedByte", '1'): "smallint", ("raw", '1'): "raw", } xtypeMap = { "adql:POINT": "spoint", "adql:REGION": "spoly", "adql:TIMESTAMP": "timestamp", "timestamp": "timestamp", "point": "spoint", "circle": "scircle", "polygon": "spoly", "moc": "smoc", "x-box": "sbox", }
[docs] def convert(self, type, arraysize, xtype=None): if self.xtypeMap.get(xtype): return self.xtypeMap[xtype] if arraysize=="1" or arraysize=="" or arraysize is None: arraysize = "1" if (type, arraysize) in self.simpleMap: return self.simpleMap[type, arraysize] else: return self.mapComplex(type, arraysize)
[docs] def mapComplex(self, type, arraysize): if arraysize=="*": arraysize = "" if type=="char": return "text" if type=="unicodeChar": return "unicode" if type=="unsignedByte" and arraysize!="1": return "bytea[]" if (type, '1') in self.simpleMap: return "%s[%s]"%(self.simpleMap[type, '1'], arraysize) raise ConversionError("No SQL type for %s, %s"%(type, arraysize))
sqltypeToVOTable = ToVOTableConverter().convert voTableToSQLType = FromVOTableConverter().convert