Pandas: to_sql UnicodeEncodeError

Created on 1 Sep 2015  ·  3Comments  ·  Source: pandas-dev/pandas

writing a dataframe to sql gives a UnicodeEncodeError, even though sys.defaultencoding is set to "utf-8" and the mysql database is also using utf-8 as default collation. Any ideas? Thanks!

In [6]: df1.to_sql(name='webshoptracks2', con=engine)

UnicodeEncodeError Traceback (most recent call last)
in ()
----> 1 df1.to_sql(name='webshoptracks2', con=engine)

C:\Python27\lib\site-packages\pandas\core\generic.pyc in to_sql(self, name, con,
flavor, schema, if_exists, index, index_label, chunksize, dtype)
980 self, name, con, flavor=flavor, schema=schema, if_exists=if_
exists,
981 index=index, index_label=index_label, chunksize=chunksize,
--> 982 dtype=dtype)
983
984 def to_pickle(self, path):

C:\Python27\lib\site-packages\pandas\io\sql.pyc in to_sql(frame, name, con, flav
or, schema, if_exists, index, index_label, chunksize, dtype)
547 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
548 index_label=index_label, schema=schema,
--> 549 chunksize=chunksize, dtype=dtype)
550
551

C:\Python27\lib\site-packages\pandas\io\sql.pyc in to_sql(self, frame, name, if_
exists, index, index_label, schema, chunksize, dtype)
1186 schema=schema, dtype=dtype)
1187 table.create()
-> 1188 table.insert(chunksize)
1189 # check for potentially case sensitivity issues (GH7815)
1190 if name not in self.engine.table_names(schema=schema or self.met
a.schema):

C:\Python27\lib\site-packages\pandas\io\sql.pyc in insert(self, chunksize)
726
727 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_li
st])
--> 728 self._execute_insert(conn, keys, chunk_iter)
729
730 def _query_iterator(self, result, chunksize, columns, coerce_float=T
rue,

C:\Python27\lib\site-packages\pandas\io\sql.pyc in _execute_insert(self, conn, k
eys, data_iter)
701 def _execute_insert(self, conn, keys, data_iter):
702 data = [dict((k, v) for k, v in zip(keys, row)) for row in data_
iter]
--> 703 conn.execute(self.insert_statement(), data)
704
705 def insert(self, chunksize=None):

C:\Python27\lib\site-packages\sqlalchemy\engine\base.pyc in execute(self, object
, _multiparams, *_params)
912 type(object))
913 else:
--> 914 return meth(self, multiparams, params)
915
916 def _execute_function(self, func, multiparams, params):

C:\Python27\lib\site-packages\sqlalchemy\sql\elements.pyc in _execute_on_connect
ion(self, connection, multiparams, params)
321
322 def _execute_on_connection(self, connection, multiparams, params):
--> 323 return connection._execute_clauseelement(self, multiparams, para
ms)
324
325 def unique_params(self, _optionaldict, *_kwargs):

C:\Python27\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_clauseeleme
nt(self, elem, multiparams, params)
1008 compiled_sql,
1009 distilled_params,
-> 1010 compiled_sql, distilled_params
1011 )
1012 if self._has_events or self.engine._has_events:

C:\Python27\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_context(sel
f, dialect, constructor, statement, parameters, *args)
1144 parameters,
1145 cursor,
-> 1146 context)
1147
1148 if self._has_events or self.engine._has_events:

C:\Python27\lib\site-packages\sqlalchemy\engine\base.pyc in _handle_dbapi_except
ion(self, e, statement, parameters, cursor, context)
1342 )
1343 else:
-> 1344 util.reraise(*exc_info)
1345
1346 finally:

C:\Python27\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_context(sel
f, dialect, constructor, statement, parameters, *args)
1114 statement,
1115 parameters,
-> 1116 context)
1117 elif not parameters and context.no_parameters:
1118 if self.dialect._has_events:

C:\Python27\lib\site-packages\sqlalchemy\dialects\mysql\mysqldb.pyc in do_execut
emany(self, cursor, statement, parameters, context)
93
94 def do_executemany(self, cursor, statement, parameters, context=None
):
---> 95 rowcount = cursor.executemany(statement, parameters)
96 if context is not None:
97 context._rowcount = rowcount

C:\Python27\lib\site-packages\pymysql\cursors.pyc in executemany(self, query, ar
gs)
153 return self._do_execute_many(q_prefix, q_values, q_postfix,
args,
154 self.max_stmt_length,
--> 155 self._get_db().encoding)
156
157 self.rowcount = sum(self.execute(query, arg) for arg in args)

C:\Python27\lib\site-packages\pymysql\cursors.pyc in _do_execute_many(self, pref
ix, values, postfix, args, max_stmt_length, encoding)
179 if isinstance(v, text_type):
180 if PY2:
--> 181 v = v.encode(encoding)
182 else:
183 v = v.encode(encoding, 'surrogateescape')

UnicodeEncodeError: 'latin-1' codec can't encode characters in position 433-438:
ordinal not in range(256)

IO SQL

Most helpful comment

Thank you! Adding '?charset=utf8' to the create_engine() call solved the problem.

All 3 comments

Did you try configuring the engine object as well? See http://docs.sqlalchemy.org/en/rel_1_0/core/engines.html#engine-creation-api

And can you provide a reproducible example? (a small dataframe that gives you this error)

Thank you! Adding '?charset=utf8' to the create_engine() call solved the problem.

Was this page helpful?
0 / 5 - 0 ratings