2014-01-06 4 views
1

Я новичок в java. Я использую apache poi для вставки данных из листа excel в mysql. На бэкэнд я создал таблицу с помощью mysql. Я импортировал все файлы jar. Ошибка, с которой я сталкиваюсь, заключается в следующем: 1. Значение даты не может быть вставлено. 2.Когда я удаляю столбец даты, он отправляет только адрес данных, а не значение. 3. Иногда он сообщает, что источник не найден. Пожалуйста, помогите мне с этой проблемой.Чтение листа excel и вставка его в базу данных mysql

public static ArrayList readExcelFile(String fileName) throws SQLException 
{ 
/** --Define a ArrayList 
    --Holds ArrayList Of Cells 

*/ 

ArrayList cellArrayLisstHolder = new ArrayList(); 


enter code here 
try{ 
/** Creating Input Stream**/ 
    FileInputStream myInput = new FileInputStream(fileName); 

/** Create a POIFSFileSystem object**/ 
POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput); 

/** Create a workbook using the File System**/ 
HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem); 

/** Get the first sheet from workbook**/ 
HSSFSheet mySheet = myWorkBook.getSheetAt(0); 

/** We now need something to iterate through the cells.**/ 
    Iterator rowIter = mySheet.rowIterator(); 
    while(rowIter.hasNext()){ 
     HSSFRow myRow = (HSSFRow) rowIter.next(); 
     Iterator cellIter = myRow.cellIterator(); 
     ArrayList cellStoreArrayList=new ArrayList(); 
     while(cellIter.hasNext()){ 
      HSSFCell myCell = (HSSFCell) cellIter.next(); 
      HSSFCell cell = myRow.createCell((short) 0); 
      cell.setCellType(HSSFCell.CELL_TYPE_NUMERIC); 

      SimpleDateFormat datetemp = new SimpleDateFormat("mm-dd-yyyy"); 
      Date cellValue = datetemp.parse("1994-01-01"); 
      cell.setCellValue(cellValue); 

      //binds the style you need to the cell. 
      HSSFCellStyle dateCellStyle = myWorkBook.createCellStyle(); 
      short df = myWorkBook.createDataFormat().getFormat("dd-mmm"); 
      dateCellStyle.setDataFormat(df); 
      cell.setCellStyle(dateCellStyle); 
      cellStoreArrayList.add(myCell); 

        } 
     cellArrayLisstHolder.add(cellStoreArrayList); 
    } 
}catch (Exception e){e.printStackTrace(); } 
return cellArrayLisstHolder; 
}%> 
<% 

File f = new File("DeptHosp.xls"); 
System.out.println(f.getAbsolutePath()); 

File file = new File("."); 
for(String fileNames : file.list()) System.out.println(fileNames); 
String fileName="D://PROJECT//SOFTWARES//eclipse_Juno//eclipse//DeptHosp.xls";      
//Read an Excel File and Store in a ArrayList 
System.out.println(" path found"); 
ArrayList dataHolder=readExcelFile(fileName); 
//Print the data read 
//printCellDataToConsole(dataHolder); 
    con=connection.getConn(); 
    System.out.println("Inserting the details"); 
    String query= 
    "insert into      departmentmaster(Dept_id,Dept_Groupid,Dept_Kid,Dept_Groupkid,Dept_Group,Dept_Name,Dept_type  ,Dept_HospitalId,Dept_Datecreated,Dept_datelastrefreshed)values(?,?,?,?,?,?,?,?,?,?)"; 
    ps=con.prepareStatement(query); 
System.out.println("Database"); 
int count=0; 
ArrayList cellStoreArrayList=null; 
Date datevalue=null; 

    //For inserting into database 
    for (int i=1;i < dataHolder.size(); i++) { 
    cellStoreArrayList=(ArrayList)dataHolder.get(i); 


    ps.setString(1,((HSSFCell)cellStoreArrayList.get(0)).getStringCellValue()); 
    ps.setString(2,((HSSFCell)cellStoreArrayList.get(1)).getStringCellValue()); 
    ps.setString(3,((HSSFCell)cellStoreArrayList.get(2)).getStringCellValue()); 
    ps.setString(4,((HSSFCell)cellStoreArrayList.get(3)).getStringCellValue()); 
    ps.setString(5,((HSSFCell)cellStoreArrayList.get(4)).getStringCellValue()); 
    ps.setString(6,((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue()); 
    ps.setString(7,((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue()); 
    ps.setString(8,((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue()); 
    ps.setString(9,((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue()); 
    ps.setString(10,((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue()); 

    count= ps.executeUpdate(); 
    System.out.println(((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue() + "\t"); 
    } 
    //For checking data is inserted or not? 
    if(count>0) 
    { %> 

       <table> 
        <tr> 
         <th>Dept_Id</th> 
         <th>Dept_GroupId</th> 
         <th>Dept_KId</th> 
         <th>Dept_GroupKid</th> 
         <th>Dept_Group</th> 
         <th>Dept_Name</th> 
         <th>Dept_Type</th> 
         <th>Hospital_Id</th> 
         <th>Dept_datecreated</th> 
         <th>Dept_datelastrefreshed</th> 


        </tr> 

    <% for (int i=1;i < dataHolder.size(); i++) { 
cellStoreArrayList=(ArrayList)dataHolder.get(i);%> 
<tr> 
    <td><%=((HSSFCell)cellStoreArrayList.get(0)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(1)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(2)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(3)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(4)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue() %></td> 
    <td><%=((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue() %></td> 
     <td><%=((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue() %></td> 




</tr> 
    <%} 
    } 
else 
{%> 
<center> Details have not been inserted!!!!!!!!!</center> 

<% }%> 

Ошибка, которую я получаю: 6 января 2014 12:05:00 PM org.apache.catalina.core.StandardWrapperValve вызова СИЛЬНЫЙ: Servlet.service() для сервлета [JSP] в контекст с путем [/ Excel] кинул исключение [исключение обработки JSP страницы /Excel.jsp в строке 139

136:  ps.setString(6,((HSSFCell)cellStoreArrayList.get(5)).getStringCellValue()); 
137:  ps.setString(7,((HSSFCell)cellStoreArrayList.get(6)).getStringCellValue()); 
138:  ps.setString(8,((HSSFCell)cellStoreArrayList.get(7)).getStringCellValue()); 
139:  ps.setString(9,((HSSFCell)cellStoreArrayList.get(8)).getStringCellValue()); 
140:  ps.setString(10, ((HSSFCell)cellStoreArrayList.get(9)).getStringCellValue()); 
141:   
142:  count= ps.executeUpdate(); 


    Stacktrace:] with root cause 
    java.lang.IndexOutOfBoundsException: Index: 8, Size: 8 
at java.util.ArrayList.rangeCheck(Unknown Source) 
at java.util.ArrayList.get(Unknown Source) 
at org.apache.jsp.Excel_jsp._jspService(Excel_jsp.java:234) 
at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:70) 
at javax.servlet.http.HttpServlet.service(HttpServlet.java:728) 
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:432) 
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:390) 
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:334) 
at javax.servlet.http.HttpServlet.service(HttpServlet.java:728) 
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:305) 
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210) 
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:51) 
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243) 
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210) 
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:222) 
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:123) 
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:502) 
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:171) 
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:100) 
at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:953) 
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118) 
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:408) 
at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1041) 
at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:603) 
at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:310) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
at java.lang.Thread.run(Unknown Source) 
+1

вы должны разделить точную ошибку или исключение StackTrace если –

+0

да .. вам нужно.. сообщите нам, где вы получаете эти проблемы/ошибки ... – TheLostMind

ответ

0

Я не могу комментировать на свой вопрос, поэтому я постараюсь ответить основанные на моем понимании того, что у вас есть проблемы чтения/записи полей DATE в Excel.

Я думаю, что вам не хватает поля «Дата», Excel хранит даты как числовое содержание внутри и во время отображения, отображает отформатированную дату. Также вам нужно убедиться, что столбец отформатирован как столбец DATE на листе, который вы читаете.

Для чтения содержимого необходимо использовать фрагмент кода ниже, чем DATE.

если (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {

// Excel stores the Date as a Numeric Contents. POI provides 
// a Date utility to check 
// if a particular cell is formatted as a date. 
if (DateUtil.isCellDateFormatted(cell)) { 
    Date date = DateUtil.getJavaDate((double) cell 
     .getNumericCellValue()); 
    SimpleDateFormat df = new SimpleDateFormat("dd/MM/yyyy HH:mm z"); 
    System.out.println("The cell is a Date : " + df.format(date)); 
} else { 
    // treat the cell as 'double' number 
    System.out.println("The cell is a number : " 
     + cell.getNumericCellValue()); 
} 

}

Для полного кода Snipper вы можете передать этот link. Обратите внимание, что я создал ссылку.

+0

Да, сэр. Я сделал то же самое, но он дает мне ошибку: «Вы не можете получить строковое значение из числовой ячейки» –

+0

Возможно, вы захотите вручную выделить ячейку введите Date в Excel и попробуйте ou t фрагмент кода? –

+0

Я тоже это сделал. Проблема связана с ps.setString (9, ((HSSFCell) cellStoreArrayList.get (8)). GetStringCellValue()); ps.setString (10, ((HSSFCell) cellStoreArrayList.get (9)). GetStringCellValue()); –

0

Вы можете использовать jython вместо POI? Если это так, следующий код выплевывает CSV, который затем может быть загружен в MySQL с помощью [загрузчика] (http://dev.mysql.com/doc/refman/5.1/en/mysqlimport.html) с помощью команды что следует код:

#!/usr/bin/python 
# 
# Copyright information 
# 
# Copyright (C) 2013-2014 Hasan Diwan 
# This program is free software; you can redistribute it and/or modify 
# This program is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty of 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
__author__ = "Hasan Diwan <[email protected]>" 
__license__ = "BSD" 
__version__ = "0.1" 

import csv, datetime, zipfile, string, sys, os, re 
import xml.parsers.expat 
from xml.dom import minidom 
try: 
    # python2.4 
    from cStringIO import StringIO 
except: 
    pass 
try: 
    from argparse import ArgumentParser 
except: 
    # python2.4 
    from optparse import OptionParser 

# see also ruby-roo lib at: http://github.com/hmcgowan/roo 
FORMATS = { 
    'general' : 'float', 
    '0' : 'float', 
    '0.00' : 'float', 
    '#,##0' : 'float', 
    '#,##0.00' : 'float', 
    '0%' : 'percentage', 
    '0.00%' : 'percentage', 
    '0.00e+00' : 'float', 
    'mm-dd-yy' : 'date', 
    'd-mmm-yy' : 'date', 
    'd-mmm' : 'date', 
    'mmm-yy' : 'date', 
    'h:mm am/pm' : 'date', 
    'h:mm:ss am/pm' : 'date', 
    'h:mm' : 'time', 
    'h:mm:ss' : 'time', 
    'm/d/yy h:mm' : 'date', 
    '#,##0 ;(#,##0)' : 'float', 
    '#,##0 ;[red](#,##0)' : 'float', 
    '#,##0.00;(#,##0.00)' : 'float', 
    '#,##0.00;[red](#,##0.00)' : 'float', 
    'mm:ss' : 'time', 
    '[h]:mm:ss' : 'time', 
    'mmss.0' : 'time', 
    '##0.0e+0' : 'float', 
    '@' : 'float', 
    'yyyy\\-mm\\-dd' : 'date', 
    'dd/mm/yy' : 'date', 
    'hh:mm:ss' : 'time', 
    "dd/mm/yy\\ hh:mm" : 'date', 
    'dd/mm/yyyy hh:mm:ss' : 'date', 
    'yy-mm-dd' : 'date', 
    'd-mmm-yyyy' : 'date', 
    'm/d/yy' : 'date', 
    'm/d/yyyy' : 'date', 
    'dd-mmm-yyyy' : 'date', 
    'dd/mm/yyyy' : 'date', 
    'mm/dd/yy hh:mm am/pm' : 'date', 
    'mm/dd/yyyy hh:mm:ss' : 'date', 
    'yyyy-mm-dd hh:mm:ss' : 'date', 
} 
STANDARD_FORMATS = { 
    0 : 'general', 
    1 : '0', 
    2 : '0.00', 
    3 : '#,##0', 
    4 : '#,##0.00', 
    9 : '0%', 
    10 : '0.00%', 
    11 : '0.00e+00', 
    12 : '# ?/?', 
    13 : '# ??/??', 
    14 : 'mm-dd-yy', 
    15 : 'd-mmm-yy', 
    16 : 'd-mmm', 
    17 : 'mmm-yy', 
    18 : 'h:mm am/pm', 
    19 : 'h:mm:ss am/pm', 
    20 : 'h:mm', 
    21 : 'h:mm:ss', 
    22 : 'm/d/yy h:mm', 
    37 : '#,##0 ;(#,##0)', 
    38 : '#,##0 ;[red](#,##0)', 
    39 : '#,##0.00;(#,##0.00)', 
    40 : '#,##0.00;[red](#,##0.00)', 
    45 : 'mm:ss', 
    46 : '[h]:mm:ss', 
    47 : 'mmss.0', 
    48 : '##0.0e+0', 
    49 : '@', 
} 

class XlsxException(Exception): 
    pass 

class InvalidXlsxFileException(XlsxException): 
    pass 

class SheetNotFoundException(XlsxException): 
    pass 

class OutFileAlreadyExistsException(XlsxException): 
    pass 

class Xlsx2csv: 
    """ 
    Usage: Xlsx2csv("test.xslx", **params).convert("test.csv", sheetid=1) 
    parameters: 
     sheetid - sheet no to convert (0 for all sheets) 
     dateformat - override date/time format 
     delimiter - csv columns delimiter symbol 
     sheet_delimiter - sheets delimiter used when processing all sheets 
     skip_empty_lines - skip empty lines 
    """ 

    def __init__(self, xlsxfile, dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False, escape_strings=False, cmd=False): 
     try: 
      self.ziphandle = zipfile.ZipFile(xlsxfile) 
     except (zipfile.BadZipfile, IOError): 
      if cmd: 
       sys.stderr.write("Invalid xlsx file: " + xlsxfile + os.linesep) 
       sys.exit(1) 
      raise InvalidXlsxFileException("Invalid xlsx file: " + xlsxfile) 

     self.dateformat = dateformat 
     self.delimiter = delimiter 
     self.sheetdelimiter = sheetdelimiter 
     self.skip_empty_lines = skip_empty_lines 
     self.cmd = cmd 
     self.py3 = sys.version_info[0] == 3 

     self.shared_strings = self._parse(SharedStrings, "xl/sharedStrings.xml") 
     self.styles = self._parse(Styles, "xl/styles.xml") 
     self.workbook = self._parse(Workbook, "xl/workbook.xml") 
     if escape_strings: 
      self.shared_strings.escape_strings() 

    def convert(self, outfile, sheetid=1): 
     """outfile - path to file or filehandle""" 
     if sheetid > 0: 
      self._convert(sheetid, outfile) 
     else: 
      if isinstance(outfile, str): 
       if not os.path.exists(outfile): 
        os.makedirs(outfile) 
       elif os.path.isfile(outfile): 
        if cmd: 
         sys.stderr.write("File " + outfile + " already exists!" + os.linesep) 
         sys.exit(1) 
        raise OutFileAlreadyExistsException("File " + outfile + " already exists!") 
      for s in self.workbook.sheets: 
       sheetname = s['name'] 
       if not self.py3: 
        sheetname = sheetname.encode('utf-8') 
       of = outfile 
       if isinstance(outfile, str): 
        of = os.path.join(outfile, sheetname + '.csv') 
       elif self.sheetdelimiter and len(self.sheetdelimiter): 
        of.write(self.sheetdelimiter + " " + str(s['id']) + " - " + sheetname + os.linesep) 
       self._convert(s['id'], of) 

    def _convert(self, sheetid, outfile): 
     closefile = False 
     if isinstance(outfile, str): 
      outfile = open(outfile, 'w+') 
      closefile = True 
     try: 
      writer = csv.writer(outfile, quoting=csv.QUOTE_ALL, delimiter=self.delimiter, lineterminator='\r\n') 
      sheetfile = self._filehandle("xl/worksheets/sheet%i.xml" % sheetid) 
      if not sheetfile: 
       if self.cmd: 
        sys.stderr.write("Sheet %s not found!%s" %(sheetid, os.linesep)) 
        sys.exit(1) 
       raise SheetNotFoundException("Sheet %s not found" %sheetid) 
      try: 
       sheet = Sheet(self.workbook, self.shared_strings, self.styles, sheetfile) 
       sheet.set_dateformat(self.dateformat) 
       sheet.set_skip_empty_lines(self.skip_empty_lines) 
       sheet.to_csv(writer) 
      finally: 
       sheetfile.close() 
     finally: 
      if closefile: 
       outfile.close() 

    def _filehandle(self, filename): 
     for name in filter(lambda f: f.lower() == filename.lower(), self.ziphandle.namelist()): 
      # python2.4 fix 
      if not hasattr(self.ziphandle, "open"): 
       return StringIO(self.ziphandle.read(name)) 
      return self.ziphandle.open(name, "r") 
     return None 

    def _parse(self, klass, filename): 
     instance = klass() 
     filehandle = self._filehandle(filename) 
     if filehandle: 
      instance.parse(filehandle) 
      filehandle.close() 
     return instance 

class Workbook: 
    def __init__(self): 
     self.sheets = [] 
     self.date1904 = False 

    def parse(self, filehandle): 
     workbookDoc = minidom.parseString(filehandle.read()) 
     if len(workbookDoc.firstChild.getElementsByTagName("fileVersion")) == 0: 
      self.appName = 'unknown' 
     else: 
      self.appName = workbookDoc.firstChild.getElementsByTagName("fileVersion")[0]._attrs['appName'].value 
     try: 
      self.date1904 = workbookDoc.firstChild.getElementsByTagName("workbookPr")[0]._attrs['date1904'].value.lower().strip() != "false" 
     except: 
      pass 

     sheets = workbookDoc.firstChild.getElementsByTagName("sheets")[0] 
     for sheetNode in sheets.getElementsByTagName("sheet"): 
      attrs = sheetNode._attrs 
      name = attrs["name"].value 
      if self.appName == 'xl': 
       if 'r:id' in attrs: id = int(attrs["r:id"].value[3:]) 
       else: id = int(attrs['sheetId'].value) 
      else: 
       if 'sheetId' in attrs: id = int(attrs["sheetId"].value) 
       else: id = int(attrs['r:id'].value[3:]) 
      self.sheets.append({'name': name, 'id': id}) 

class Styles: 
    def __init__(self): 
     self.numFmts = {} 
     self.cellXfs = [] 

    def parse(self, filehandle): 
     styles = minidom.parseString(filehandle.read()).firstChild 
     # numFmts 
     numFmtsElement = styles.getElementsByTagName("numFmts") 
     if len(numFmtsElement) == 1: 
      for numFmt in numFmtsElement[0].childNodes: 
       if numFmt.nodeType == minidom.Node.ELEMENT_NODE: 
        numFmtId = int(numFmt._attrs['numFmtId'].value) 
        formatCode = numFmt._attrs['formatCode'].value.lower().replace('\\', '') 
        self.numFmts[numFmtId] = formatCode 
     # cellXfs 
     cellXfsElement = styles.getElementsByTagName("cellXfs") 
     if len(cellXfsElement) == 1: 
      for cellXfs in cellXfsElement[0].childNodes: 
       if cellXfs.nodeType != minidom.Node.ELEMENT_NODE or cellXfs.nodeName != "xf": 
        continue 
       if 'numFmtId' in cellXfs._attrs: 
        numFmtId = int(cellXfs._attrs['numFmtId'].value) 
        self.cellXfs.append(numFmtId) 
       else: 
        self.cellXfs.append(None) 

class SharedStrings: 
    def __init__(self): 
     self.parser = None 
     self.strings = [] 
     self.si = False 
     self.t = False 
     self.rPh = False 
     self.value = "" 

    def parse(self, filehandle): 
     self.parser = xml.parsers.expat.ParserCreate() 
     self.parser.CharacterDataHandler = self.handleCharData 
     self.parser.StartElementHandler = self.handleStartElement 
     self.parser.EndElementHandler = self.handleEndElement 
     self.parser.ParseFile(filehandle) 

    def escape_strings(self): 
     for i in range(0, len(self.strings)): 
      self.strings[i] = self.strings[i].replace("\r", "\\r").replace("\n", "\\n").replace("\t", "\\t") 

    def handleCharData(self, data): 
     if self.t: 
      self.value+= data 

    def handleStartElement(self, name, attrs): 
     if name == 'si': 
      self.si = True 
      self.value = "" 
     elif name == 't' and self.rPh: 
      self.t = False 
     elif name == 't' and self.si: 
      self.t = True 
     elif name == 'rPh': 
      self.rPh = True 

    def handleEndElement(self, name): 
     if name == 'si': 
      self.si = False 
      self.strings.append(self.value) 
     elif name == 't': 
      self.t = False 
     elif name == 'rPh': 
      self.rPh = False 

class Sheet: 
    def __init__(self, workbook, sharedString, styles, filehandle): 
     self.py3 = sys.version_info[0] == 3 
     self.parser = None 
     self.writer = None 
     self.sharedString = None 
     self.styles = None 

     self.in_sheet = False 
     self.in_row = False 
     self.in_cell = False 
     self.in_cell_value = False 
     self.in_cell_formula = False 

     self.columns = {} 
     self.rowNum = None 
     self.colType = None 
     self.s_attr = None 
     self.data = None 

     self.dateformat = None 
     self.skip_empty_lines = False 

     self.filehandle = filehandle 
     self.workbook = workbook 
     self.sharedStrings = sharedString.strings 
     self.styles = styles 

    def set_dateformat(self, dateformat): 
     self.dateformat = dateformat 

    def set_skip_empty_lines(self, skip): 
     self.skip_empty_lines = skip 

    def to_csv(self, writer): 
     self.writer = writer 
     self.parser = xml.parsers.expat.ParserCreate() 
     self.parser.CharacterDataHandler = self.handleCharData 
     self.parser.StartElementHandler = self.handleStartElement 
     self.parser.EndElementHandler = self.handleEndElement 
     self.parser.ParseFile(self.filehandle) 

    def handleCharData(self, data): 
     if self.in_cell_value: 
      self.collected_string+= data 
      self.data = self.collected_string 
      if self.colType == "s": # shared string 
       self.data = self.sharedStrings[int(self.data)] 
      elif self.colType == "b": # boolean 
       self.data = (int(data) == 1 and "TRUE") or (int(data) == 0 and "FALSE") or data 
      elif self.s_attr: 
       s = int(self.s_attr) 

       # get cell format 
       format = None 
       xfs_numfmt = self.styles.cellXfs[s] 
       if xfs_numfmt in self.styles.numFmts: 
        format = self.styles.numFmts[xfs_numfmt] 
       elif xfs_numfmt in STANDARD_FORMATS: 
        format = STANDARD_FORMATS[xfs_numfmt] 
       # get format type 
       if format and format in FORMATS: 
        format_type = FORMATS[format] 
        try: 
         if format_type == 'date': # date/time 
          if self.workbook.date1904: 
           date = datetime.datetime(1904, 1, 1) + datetime.timedelta(float(self.data)) 
          else: 
           date = datetime.datetime(1899, 12, 30) + datetime.timedelta(float(self.data)) 
          if self.dateformat: 
           # str(dateformat) - python2.5 bug, see: http://bugs.python.org/issue2782 
           self.data = date.strftime(str(self.dateformat)) 
          else: 
           dateformat = format.replace("yyyy", "%Y").replace("yy", "%y"). \ 
            replace("hh:mm", "%H:%M").replace("h", "%H").replace("%H%H", "%H").replace("ss", "%S"). \ 
            replace("d", "%e").replace("%e%e", "%d"). \ 
            replace("mmmm", "%B").replace("mmm", "%b").replace(":mm", ":%M").replace("m", "%m").replace("%m%m", "%m"). \ 
            replace("am/pm", "%p") 
           self.data = date.strftime(str(dateformat)).strip() 
         elif format_type == 'time': # time 
          self.data = str(float(self.data) * 24*60*60) 
         elif format_type == 'float' and ('E' in self.data or 'e' in self.data): 
          self.data = ("%f" %(float(self.data))).rstrip('0').rstrip('.') 
        except (ValueError, OverflowError): 
         # invalid date format 
         pass 
     # does not support it 
     #elif self.in_cell_formula: 
     # self.formula = data 

    def handleStartElement(self, name, attrs): 
     if self.in_row and name == 'c': 
      self.colType = attrs.get("t") 
      self.s_attr = attrs.get("s") 
      cellId = attrs.get("r") 
      if cellId: 
       self.colNum = cellId[:len(cellId)-len(self.rowNum)] 
       self.colIndex = 0 
      else: 
       self.colIndex+= 1 
      #self.formula = None 
      self.data = "" 
      self.in_cell = True 
     elif self.in_cell and (name == 'v' or name == 'is'): 
      self.in_cell_value = True 
      self.collected_string = "" 
     #elif self.in_cell and name == 'f': 
     # self.in_cell_formula = True 
     elif self.in_sheet and name == 'row' and 'r' in attrs: 
      self.rowNum = attrs['r'] 
      self.in_row = True 
      self.columns = {} 
      self.spans = None 
      if 'spans' in attrs: 
       self.spans = [int(i) for i in attrs['spans'].split(":")] 
     elif name == 'sheetData': 
      self.in_sheet = True 

    def handleEndElement(self, name): 
     if self.in_cell and name == 'v': 
      self.in_cell_value = False 
     #elif self.in_cell and name == 'f': 
     # self.in_cell_formula = False 
     elif self.in_cell and name == 'c': 
      t = 0 
      for i in self.colNum: t = t*26 + ord(i) - 64 
      self.columns[t - 1 + self.colIndex] = self.data 
      self.in_cell = False 
     if self.in_row and name == 'row': 
      if len(self.columns.keys()) > 0: 
       d = [""] * (max(self.columns.keys()) + 1) 
       for k in self.columns.keys(): 
        val = self.columns[k] 
        if not self.py3: 
         val = val.encode("utf-8") 
        d[k] = val 
       if self.spans: 
        l = self.spans[0] + self.spans[1] - 1 
        if len(d) < l: 
         d+= (l - len(d)) * [''] 
       # write line to csv 
       if not self.skip_empty_lines or d.count('') != len(d): 
        self.writer.writerow(d) 
      self.in_row = False 
     elif self.in_sheet and name == 'sheetData': 
      self.in_sheet = False 

def convert_recursive(path, sheetid, kwargs): 
    kwargs['cmd'] = False 
    for name in os.listdir(path): 
     fullpath = os.path.join(path, name) 
     if os.path.isdir(fullpath): 
      convert_recursive(fullpath, kwargs) 
     else: 
      if fullpath.lower().endswith(".xlsx"): 
       outfilepath = fullpath[:-4] + 'csv' 
       print("Converting %s to %s" %(fullpath, outfilepath)) 
       try: 
        Xlsx2csv(fullpath, **kwargs).convert(outfilepath, sheetid) 
       except zipfile.BadZipfile: 
        print("File %s is not a zip file" %fullpath) 

if __name__ == "__main__": 
    if "ArgumentParser" in globals(): 
     parser = ArgumentParser(description = "xlsx to csv convertor") 
     parser.add_argument('infile', metavar='xlsxfile', help="xlsx file path") 
     parser.add_argument('outfile', metavar='outfile', nargs='?', help="output csv file path") 
     parser.add_argument('-v', '--version', action='version', version='%(prog)s') 
     argparser = True 
    else: 
     parser = OptionParser(usage = "%prog [options] infile [outfile]", version=__version__) 
     parser.add_argument = parser.add_option 
     argparser = False 

    parser.add_argument("-a", "--all", dest="all", default=False, action="store_true", 
     help="export all sheets") 
    parser.add_argument("-d", "--delimiter", dest="delimiter", default=",", 
     help="delimiter - csv columns delimiter, 'tab' or 'x09' for tab (comma is default)") 
    parser.add_argument("-f", "--dateformat", dest="dateformat", 
     help="override date/time format (ex. %%Y/%%m/%%d)") 
    parser.add_argument("-i", "--ignoreempty", dest="skip_empty_lines", default=False, action="store_true", 
     help="skip empty lines") 
    parser.add_argument("-e", "--escape", dest='escape_strings', default=False, action="store_true", 
     help="Escape \\r\\n\\t characters") 
    parser.add_argument("-p", "--sheetdelimiter", dest="sheetdelimiter", default="--------", 
     help="sheets delimiter used to separate sheets, pass '' if you don't want delimiters (default '--------')") 
    parser.add_argument("-s", "--sheet", dest="sheetid", default=1, type=int, 
     help="sheet no to convert (0 for all sheets)") 

    if argparser: 
     options = parser.parse_args() 
    else: 
     (options, args) = parser.parse_args() 
     if len(args) < 1: 
      parser.print_usage() 
      sys.stderr.write("error: too few arguments" + os.linesep) 
      sys.exit(1) 
     options.infile = args[0] 
     options.outfile = len(args) > 1 and args[1] or None 

    if len(options.delimiter) == 1: 
     delimiter = options.delimiter 
    elif options.delimiter == 'tab': 
     delimiter = '\t' 
    elif options.delimiter == 'comma': 
     delimiter = ',' 
    elif options.delimiter[0] == 'x': 
     delimiter = chr(int(options.delimiter[1:])) 
    else: 
     raise XlsxException("Invalid delimiter") 

    kwargs = { 
     'delimiter' : delimiter, 
     'sheetdelimiter' : options.sheetdelimiter, 
     'dateformat' : options.dateformat, 
     'skip_empty_lines' : options.skip_empty_lines, 
     'escape_strings' : options.escape_strings, 
     'cmd' : True 
    } 
    sheetid = options.sheetid 
    if options.all: 
     sheetid = 0 

    if os.path.isdir(options.infile):mysq 
     convert_recursive(options.infile, sheetid, kwargs) 
    else: 
     xlsx2csv = Xlsx2csv(options.infile, **kwargs) 
     outfile = options.outfile or sys.stdout 
     xlsx2csv.convert(outfile, sheetid) 

чтобы импортировать это MySQL, используйте java -cp jython.jar org.python.util.jython xlsx2csv [xlsx-file] [csv-output-file]; mysqlimport --local db1 [csv-output-file] Я надеюсь, что это помогает ...

Смежные вопросы