Posted By

eristoddle on 06/30/11


Tagged


Versions (?)

Python CSV to MySQL


 / Published in: Python
 

  1. #!/usr/bin/env python
  2.  
  3. # Run with no args for usage instructions
  4. #
  5. # Notes:
  6. # - will probably insert duplicate records if you load the same file twice
  7. # - assumes that the number of fields in the header row is the same
  8. # as the number of columns in the rest of the file and in the database
  9. # - assumes the column order is the same in the file and in the database
  10. #
  11. # Speed: ~ 1s/MB
  12. #
  13.  
  14. import sys
  15. import MySQLdb
  16. import csv
  17.  
  18. def main(user, db, table, csvfile):
  19.  
  20. try:
  21. conn = getconn(user, db,)
  22. except MySQLdb.Error, e:
  23. print "Error %d: %s" % (e.args[0], e.args[1])
  24. sys.exit (1)
  25.  
  26. cursor = conn.cursor()
  27.  
  28. loadcsv(cursor, table, csvfile)
  29.  
  30. cursor.close()
  31. conn.close()
  32.  
  33. def getconn(user, db, passwd=""):
  34. conn = MySQLdb.connect(host = "localhost",
  35. user = user,
  36. passwd = "",
  37. db = db)
  38. return conn
  39.  
  40. def nullify(L):
  41. """Convert empty strings in the given list to None."""
  42.  
  43. # helper function
  44. def f(x):
  45. if(x == ""):
  46. return None
  47. else:
  48. return x
  49.  
  50. return [f(x) for x in L]
  51.  
  52. def loadcsv(cursor, table, filename):
  53.  
  54. """
  55. Open a csv file and load it into a sql table.
  56. Assumptions:
  57. - the first line in the file is a header
  58. """
  59.  
  60. f = csv.reader(open(filename))
  61.  
  62. header = f.next()
  63. numfields = len(header)
  64.  
  65. query = buildInsertCmd(table, numfields)
  66.  
  67. for line in f:
  68. vals = nullify(line)
  69. cursor.execute(query, vals)
  70.  
  71. return
  72.  
  73. def buildInsertCmd(table, numfields):
  74.  
  75. """
  76. Create a query string with the given table name and the right
  77. number of format placeholders.
  78.  
  79. example:
  80. >>> buildInsertCmd("foo", 3)
  81. 'insert into foo values (%s, %s, %s)'
  82. """
  83. assert(numfields > 0)
  84. placeholders = (numfields-1) * "%s, " + "%s"
  85. query = ("insert into %s" % table) + (" values (%s)" % placeholders)
  86. return query
  87.  
  88. if __name__ == '__main__':
  89. # commandline execution
  90.  
  91. args = sys.argv[1:]
  92. if(len(args) < 4):
  93. print "error: arguments: user db table csvfile"
  94. sys.exit(1)
  95.  
  96. main(*args)
  97.  

Report this snippet  

You need to login to post a comment.