Thank you! Your feedback has been delivered
Thank you! Your feedback has been sent

Escaping Python and MySQL insert commands or using parameter queries

Hey everybody,

I'm trying to insert a bulk of hundreds of rows that contain html data into a MySQL server using python. I was too lazy to use parameters in my query so instead I just aggregated a huge string insert command with all the data. The problem is that now it's not easy to escape the html content correctly and it messes up the string I send to MySQL.

I'm too short in time to do it by myself so I would love it if any of you guys could help me to either find the right way to escape the data, or alternatively change the code so it would use parameters in the query.

Thank you so much!

import xml.etree.ElementTree as etree
import MySQLdb;
import re;

column_names = ['Id', 'PostTypeId', 'AcceptedAnswerId', 'ParentId', 'CreationDate', 'Score', 'ViewCount', 'Body', 'OwnerUserId', 'OwnerDisplayName',
                                       'LastEditorUserId', 'LastEditorDisplayName', 'LastEditDate', 'LastActivityDate', 'Title', 'Tags', 'AnswerCount',
                                       'CommentCount', 'FavoriteCount', 'ClosedDate', 'CommunityOwnedDate']  

def get_initial_insert_string():
    string = "INSERT INTO post ("

    for column in column_names:
        string += column + ","
    string = string[:-1] #removes the last "," character

    string += ") VALUES"

    return string

context  = etree.iterparse("D:datafile.xml", events=("start", "end"))

# turn it into an iterator
context = iter(context)

# get the root element
event, root = context.next()


bulkCounter = 0

stringBuilder = get_initial_insert_string()


for event, elem in context:
    if event == "end" and elem.tag == "row":
        if(bulkCounter % 1 == 0 and bulkCounter != 0):
            stringBuilder = stringBuilder[:-1] #removes the last "," character
            db = MySQLdb.connect(host="xx.xxx.xx.xxx", user="user", passwd="passwd", db="mydb")
            db.set_character_set('utf8')

            cursor = db.cursor()
            cursor.execute('SET NAMES utf8;') 
            cursor.execute('SET CHARACTER SET utf8;')
            cursor.execute('SET character_set_connection=utf8;')
            cursor.execute(stringBuilder)
            db.commit()
            print cursor.rowcount
            stringBuilder = get_initial_insert_string()
        bulkCounter += 1

        stringBuilder += "("
        for column in column_names:
            if column in elem.attrib:
                escapedString= re.sub(r"[rnx00x1a\'""]", r"g<0>", elem.attrib[column])
                stringBuilder += ("""+escapedString+ """ +",")
            else:
                stringBuilder += "null,"
        stringBuilder = stringBuilder[:-1] #removes the last "," character
        stringBuilder += "),"

        root.clear()
User Gravatar

Dror

Posted Nov 26 2013 20:25 UTC

$50


  • Assigned To cyber-duck
  • Solved
  • python
    mysql
    python-mysql
  • 2613 Views

1 Replies


Not the prettiest thing, but should work for you.

import xml.etree.ElementTree as etree
import MySQLdb;
import re;

column_names = ['Id', 'PostTypeId', 'AcceptedAnswerId', 'ParentId', 'CreationDate', 'Score', 'ViewCount', 'Body', 'OwnerUserId', 'OwnerDisplayName',
                                       'LastEditorUserId', 'LastEditorDisplayName', 'LastEditDate', 'LastActivityDate', 'Title', 'Tags', 'AnswerCount',
                                       'CommentCount', 'FavoriteCount', 'ClosedDate', 'CommunityOwnedDate']  



def get_row_insert_string():
    string = "INSERT INTO post ("

    for column in column_names:
        string += column + ","
    string = string[:-1] #removes the last "," character

    string += ") VALUES ("

    for column in column_names:
        string += "%s,"
    string = string[:-1] #removes the last "," character
    string += ")"

    return string


context  = etree.iterparse("D:datafile.xml", events=("start", "end"))

# turn it into an iterator
context = iter(context)

# get the root element
event, root = context.next()


bulkCounter = 0


db = MySQLdb.connect(host="xx.xxx.xx.xxx", user="user", passwd="passwd", db="mydb")
db.set_character_set('utf8')

cursor = db.cursor()
cursor.execute('SET NAMES utf8;') 
cursor.execute('SET CHARACTER SET utf8;')
cursor.execute('SET character_set_connection=utf8;')

for event, elem in context:
    if event == "end" and elem.tag == "row":

        #cursor.execute('SET autocommit=0;') 

        if(bulkCounter % 100 == 0 and bulkCounter != 0):

            db.commit()
            print cursor.rowcount
        bulkCounter += 1

        #queryString = get_initial_insert_string() + "($s)";

        cursor.execute(get_row_insert_string(), (elem.attrib.get(column_names[0]), elem.attrib.get(column_names[1])
                       , elem.attrib.get(column_names[2]), elem.attrib.get(column_names[3]), elem.attrib.get(column_names[4]), elem.attrib.get(column_names[5])
                       , elem.attrib.get(column_names[6]), elem.attrib.get(column_names[7]), elem.attrib.get(column_names[8]), elem.attrib.get(column_names[9])
                       , elem.attrib.get(column_names[10]), elem.attrib.get(column_names[11]), elem.attrib.get(column_names[12]), elem.attrib.get(column_names[13])
                       , elem.attrib.get(column_names[14]), elem.attrib.get(column_names[15]), elem.attrib.get(column_names[16]), elem.attrib.get(column_names[17])
                       , elem.attrib.get(column_names[18]), elem.attrib.get(column_names[19]), elem.attrib.get(column_names[20])))



        root.clear()
User Gravatar

cyber-duck

Posted Nov 27 2013 23:47 UTC

Solution

This didn't solve your task? Get your own custom solution.

Add a reply

By posting a reply on CodersClan you agree to our Terms & Conditions