Source code for gavo.grammars.mysqldumpgrammar
"""
A q'n'd grammar for reading MySQL dumps of moderate size.
"""
#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import re
from gavo import base
from gavo.grammars import common
[docs]def guessFieldNames(dump):
"""returns the table name and the column names for the first
CREATE TABLE statement in a MySQL dump.
"""
mat = re.search("CREATE TABLE `([^`]*)` \(", dump)
if not mat:
raise base.DataError("No proper CREATE TABLE statement found")
tableName = mat.group(1)
curPos = mat.end()
names = []
rowPat = re.compile(
r"\s*`(?P<name>[^`]*)` (?P<type>[^ ]*) (?P<flags>[^,)]*),?")
while True:
mat = rowPat.match(dump, curPos)
if not mat:
# sanity check would be great here.
break
names.append(mat.group("name"))
curPos = mat.end()
return tableName, names, curPos
[docs]def makeRecord(fieldNames, fieldValues):
"""creates a rawdict for fieldValues
This function should undo any quoting introduced by MySQL. It doesn't right
now since we're not working from actual docs.
"""
res = {}
for name, val in zip(fieldNames, fieldValues):
if val=="NULL":
val = None
else:
val = val.strip("'")
res [name] = val
return res
[docs]class RowIterator(common.FileRowIterator):
def _iterRows(self):
dumpContents = self.inputFile.read()
tableName, fieldNames, curPos = guessFieldNames(dumpContents)
insertionPat = re.compile("INSERT INTO `%s` VALUES"%tableName)
# TODO: handle embedded quotes ('')
valueRE = "('[^']*'|[^',][^,]*)"
rowPat = re.compile(r"\s*\(%s\),?"%(",".join(valueRE for i in fieldNames)))
while True:
mat = insertionPat.search(dumpContents, curPos)
if not mat:
break
curPos = mat.end()
while True:
mat = rowPat.match(dumpContents, curPos)
if not mat:
# sanity check that we really reached the end of the VALUE
# statement
if not dumpContents[curPos:curPos+30].strip().startswith(";"):
raise base.DataError("Expected VALUES-ending ; char %s;"
" found %s instead."%(
curPos, repr(dumpContents[curPos: curPos+30])))
break
yield makeRecord(fieldNames, mat.groups())
curPos = mat.end()
[docs]class MySQLDumpGrammar(common.Grammar, common.FileRowAttributes):
"""A grammar pulling information from MySQL dump files.
WARNING: This is a quick hack. If you want/need it, please contact the
authors.
At this point this is nothing but an ugly RE mess with lots of assumptions
about the dump file that's easily fooled. Also, the entire dump file
will be pulled into memory.
Since grammar semantics cannot do anything else, this will always only
iterate over a single table. This currently is fixed to the first,
but it's conceivable to make that selectable.
Database NULLs are already translated into Nones.
In other words: It might do for simple cases. If you have something else,
improve this or complain to the authors.
"""
name_ = "mySQLDumpGrammar"
rowIterator = RowIterator