Package gavo :: Package formats :: Module votableread
[frames] | no frames]

Source Code for Module gavo.formats.votableread

  1  """ 
  2  Parsing and translating VOTables to internal data structures. 
  3   
  4  This is glue code to the more generic votable library.  In general, you 
  5  should access this module through formats.votable. 
  6  """ 
  7   
  8  #c Copyright 2008-2019, the GAVO project 
  9  #c 
 10  #c This program is free software, covered by the GNU GPL.  See the 
 11  #c COPYING file in the source distribution. 
 12   
 13   
 14  import gzip 
 15   
 16  from gavo import base 
 17  from gavo import rsc 
 18  from gavo import rscdef 
 19  from gavo import utils 
 20  from gavo import votable 
 21  from gavo.base import valuemappers 
 22  from gavo.grammars import votablegrammar 
 23  from gavo.votable import V 
 24  from gavo.votable import modelgroups 
 25   
 26  MS = base.makeStruct 
 27   
28 -class QuotedNameMaker(object):
29 """A name maker for makeTableDefForVOTable implementing TAP's requirements. 30 """
31 - def __init__(self):
32 self.index, self.seenNames = 0, set()
33
34 - def makeName(self, field):
35 self.index += 1 36 res = getattr(field, "name", None) 37 if res is None: 38 raise base.ValidationError("Field without name in upload.", 39 "UPLOAD") 40 if res in self.seenNames: 41 raise base.ValidationError("Duplicate column name illegal in" 42 " uploaded tables (%s)"%res, "UPLOAD") 43 self.seenNames.add(res) 44 return utils.QuotedName(res)
45 46 47 _PG_RESERVED_COLUMN_NAMES = set([ 48 "oid", "tableoid", "xmin", "cmin", "xmax", "cmax", "ctid"])
49 -class _ChangedName(str):
50 """a sentinel class to tell upstream that a name has been 51 changed in a way that must be reflected in a query. 52 """
53 54
55 -class AutoQuotedNameMaker(object):
56 """A name maker for makeTableDefForVOTable quoting names as necessary. 57 58 This is for PostgreSQL; it will also avoid PG's reserved column names (oid 59 and friends); hence, this is what you should be using to put VOTables 60 into postgres tables. 61 62 This will break on duplicate names right now. I expect I'll change 63 that behaviour to just renaming away name clashes at some point. 64 """
65 - def __init__(self, forRowmaker=False):
66 self.seenNames = set()
67
68 - def makeName(self, field):
69 name = getattr(field, "name", None) 70 if name is None: 71 raise base.ValidationError("Field without name in upload.", 72 "UPLOAD") 73 74 if valuemappers.needsQuoting(name): 75 if name in self.seenNames: 76 raise base.ValidationError("Duplicate column name illegal in" 77 " uploaded tables (%s)"%name, "UPLOAD") 78 self.seenNames.add(name) 79 return utils.QuotedName(name) 80 81 elif name.lower() in _PG_RESERVED_COLUMN_NAMES: 82 name = name.lower()+"_" 83 while name in self.seenNames: 84 name = name+"_" 85 self.seenNames.add(name) 86 return _ChangedName(name) 87 88 else: 89 if name.lower() in self.seenNames: 90 raise base.ValidationError("Duplicate column name illegal in" 91 " uploaded tables (%s)"%name, "UPLOAD") 92 self.seenNames.add(name.lower()) 93 return name
94 95
96 -def addQ3CIndex(tableDef):
97 """if td as unique main positions (by UCD), add an index to the table 98 definition. 99 """ 100 try: 101 raField = tableDef.getColumnByUCDs("pos.eq.ra;meta.main", 102 "POS_EQ_RA_MAIN") 103 decField = tableDef.getColumnByUCDs("pos.eq.dec;meta.main", 104 "POS_EQ_RA_MAIN") 105 if (raField.type not in ["real", "double precision"] 106 or decField.type not in ["real", "double precision"]): 107 raise ValueError("Don't index non-floats") 108 except ValueError: # No unique positions 109 return 110 base.resolveId(None, "//scs#q3cindex").applyToFinished(tableDef)
111 112
113 -def _getValuesFromField(votField):
114 """returns None or an rscdef.Values instance for whatever is given 115 in votField. 116 """ 117 valArgs = {} 118 for valSpec in votField.iterChildrenOfType(V.VALUES): 119 120 if valSpec.null is not None: 121 valArgs["nullLiteral"] = valSpec.null 122 123 for minSpec in valSpec.iterChildrenOfType(V.MIN): 124 valArgs["min"] = minSpec.value 125 126 for maxSpec in valSpec.iterChildrenOfType(V.MAX): 127 valArgs["max"] = maxSpec.value 128 129 options = [] 130 for optSpec in valSpec.iterChildrenOfType(V.OPTION): 131 # We don't support nested options in rscdef. 132 consArgs = {"content_": optSpec.value} 133 if optSpec.name: 134 consArgs["title"] = optSpec.name 135 options.append(base.makeStruct(rscdef.Option, **consArgs)) 136 if options: 137 valArgs["options"] = options 138 if valArgs: 139 return base.makeStruct(rscdef.Values, **valArgs)
140 141
142 -def _getColArgs(votInstance, name):
143 """returns constructor arguments for an RD column or param from 144 a VOTable FIELD or PARAM. 145 """ 146 kwargs = {"name": name, 147 "tablehead": name.capitalize(), 148 "id": getattr(votInstance, "ID", None), 149 "type": base.voTableToSQLType( 150 votInstance.datatype, votInstance.arraysize, votInstance.xtype)} 151 152 for attName in ["ucd", "unit", "xtype"]: 153 if getattr(votInstance, attName, None) is not None: 154 kwargs[attName] = getattr(votInstance, attName) 155 156 if getattr(votInstance, "value", None) is not None: 157 kwargs["content_"] = votInstance.value 158 values = _getValuesFromField(votInstance) 159 if values: 160 kwargs["values"] = values 161 162 for desc in votInstance.iterChildrenOfType(V.DESCRIPTION): 163 kwargs["description"] = desc.text_ 164 165 return kwargs
166 167
168 -def makeTableDefForVOTable(tableId, votTable, nameMaker=None, rd=None, 169 **moreArgs):
170 """returns a TableDef for a Table element parsed from a VOTable. 171 172 Pass additional constructor arguments for the table in moreArgs. 173 stcColumns is a dictionary mapping IDs within the source VOTable 174 to pairs of stc and utype. 175 176 nameMaker is an optional argument; if given, it must be an object 177 having a makeName(field) -> string or utils.QuotedName method. 178 It must return unique objects from VOTable fields and do that 179 reproducibly, i.e., for a given field the same name is returned. 180 181 The default is valuemappers.VOTNameMaker. When building TDs for Postgres, 182 use AutoQuotedNameMaker to generate valid column names. 183 184 As an extra service, in particular for ADQL name resolving, the column 185 objects returned here have an attribute originalName containing 186 whatever was originally in a FIELD's @name. 187 188 If unique "main" positions are given, a spatial q3c index will be 189 added. 190 """ 191 if nameMaker is None: 192 nameMaker = valuemappers.VOTNameMaker() 193 194 # make columns 195 columns = [] 196 for f in votTable.iterChildrenOfType(V.FIELD): 197 newName = nameMaker.makeName(f) 198 columns.append(MS(rscdef.Column, 199 **_getColArgs(f, newName))) 200 # tell the ADQL machinery if we've significantly modified the 201 # name (i.e., more than just quoting). 202 if isinstance(newName, _ChangedName): 203 columns[-1].originalName = f.name 204 205 # make params 206 params = [] 207 for f in votTable.iterChildrenOfType(V.PARAM): 208 try: 209 params.append(MS(rscdef.Param, **_getColArgs(f, f.name))) 210 except Exception as ex: # never die because of failing params 211 base.ui.notifyError("Unsupported PARAM ignored (%s)"%ex) 212 213 # Create the table definition 214 tableDef = MS(rscdef.TableDef, id=tableId, columns=columns, 215 params=params, parent_=rd, **moreArgs) 216 addQ3CIndex(tableDef) 217 218 # Build STC info 219 for colInfo, ast in modelgroups.unmarshal_STC(votTable): 220 for colId, utype in colInfo.iteritems(): 221 try: 222 col = tableDef.getColumnById(colId) 223 col.stcUtype = utype 224 col.stc = ast 225 except utils.NotFoundError: # ignore broken STC 226 pass 227 228 return tableDef
229 230
231 -def makeDDForVOTable(tableId, vot, gunzip=False, rd=None, **moreArgs):
232 """returns a DD suitable for uploadVOTable. 233 234 moreArgs are additional keywords for the construction of the target 235 table. 236 237 Only the first resource will be turned into a DD. Currently, 238 only the first table is used. This probably has to change. 239 """ 240 tableDefs = [] 241 for res in vot.iterChildrenOfType(V.RESOURCE): 242 for table in res.iterChildrenOfType(V.TABLE): 243 tableDefs.append( 244 makeTableDefForVOTable(tableId, table, rd=rd, **moreArgs)) 245 break 246 break 247 if tableDefs: 248 makes = [MS(rscdef.Make, table=tableDefs[0])] 249 else: 250 makes = [] 251 return MS(rscdef.DataDescriptor, 252 grammar=MS(votablegrammar.VOTableGrammar, gunzip=gunzip), 253 makes=makes)
254 255
256 -def _getRowMaker(table):
257 """returns a function turning a VOTable tuple to a database row 258 for table. 259 260 This is mainly just building a row dictionary, except we also 261 parse xtyped columns. 262 """ 263 from gavo.base.literals import parseDefaultDatetime #noflake: code gen 264 from gavo.stc import parseSimpleSTCS, simpleSTCSToPolygon #noflake: code gen 265 266 parts = [] 267 for colInd, col in enumerate(table.tableDef): 268 valCode = "row[%d]"%colInd 269 parts.append("%s: %s"%(repr(col.key), valCode)) 270 271 return utils.compileFunction( 272 "def makeRow(row):\n return {%s}"%(", ".join(parts)), 273 "makeRow", 274 locals())
275 276
277 -def uploadVOTable(tableId, srcFile, connection, gunzip=False, 278 rd=None, **tableArgs):
279 """creates a temporary table with tableId containing the first 280 table in the VOTable in srcFile. 281 282 The function returns a DBTable instance for the new file. 283 284 srcFile must be an open file object (or some similar object). 285 """ 286 if gunzip: 287 srcFile = gzip.GzipFile(fileobj=srcFile, mode="r") 288 try: 289 tuples = votable.parse(srcFile, raiseOnInvalid=False).next() 290 except StopIteration: # no table contained, not our problem 291 return 292 293 args = {"onDisk": True, "temporary": True} 294 args.update(tableArgs) 295 td = makeTableDefForVOTable(tableId, tuples.tableDefinition, 296 rd=rd, **args) 297 298 table = rsc.TableForDef(td, connection=connection, create=True) 299 makeRow = _getRowMaker(table) 300 with table.getFeeder() as feeder: 301 for tuple in tuples: 302 feeder.add(makeRow(tuple)) 303 return table
304