前言
由于需, 需要导入上万行数据, 重新巩固excel导入的问题, 目前主要使用得到:
apache POI(以下简称POI)原生的导入- 一个常用的工具, hutool的ExcelUtil(依赖POI的封装)
- 阿里easy excel(依赖POI的封装)
POI比较原生, 需要自己封装方法和可操作性比较强hutool对POI进行日常的封装, 上手简单easy excel对POI做了最优化, 可以读取大数据POI和hutool, 都是使用sax模式可以快速的读取数据, 本质是把excel转换为xml就行读取easy excel本身做了优化为一行一行读取que: 根据类型某行单元格的颜色, 判断这条记录的状态
自己上网找了很多资料, hutool和easy excel都没有现成的, 只能使用原生的POI, 代码使用hutool+POI读取, 但是读取速度较慢, 每次1000多条时, 读取速度比较适中, 也没做后续速度上的优化.
依赖, easy默认导poi入依赖, hutool默认没有导入POI依赖
<properties>
<easyexcel.version>3.0.5easyexcel.version>
<hutool.all.version>5.4.0hutool.all.version>
properties>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>easyexcelartifactId>
<version>${easyexcel.version}version>
dependency>
<dependency>
<groupId>cn.hutoolgroupId>
<artifactId>hutool-allartifactId>
<version>${hutool.all.version}version>
dependency>
public class POIExcelUtil {
/**
* 读取excel数据
* @param path 路径
* @param sheetIndex sheet序号, 从0开始
* @param startRowIndex 开始行, 从0开始
* @param endRowIndex 结束行, 从0开始
* @return List>
*/
public static List<List<Object>> read(String path, int sheetIndex, int startRowIndex, int endRowIndex) throws IOException {
// 相当于一个excel个文件
Workbook workbook = WorkbookUtil.createBook(path);
// 获取第sheetIndex个表单
sheet = workbook.getSheetAt(sheetIndex);
// 读取起始行(包含)
startRowIndex = Math.max(startRowIndex, sheet.getFirstRowNum());
// 读取结束行(包含)
endRowIndex = Math.min(endRowIndex, sheet.getLastRowNum());
resultList = new ArrayList<>();
List<Object> rowList;
for (int i = startRowIndex; i <= endRowIndex; i++) {
rowList = readRow(i);
if (CollUtil.isNotEmpty(rowList)) {
if (null == rowList) {
rowList = new ArrayList<>(0);
}
resultList.add(rowList);
}
}
workbook.close();
// System.out.println(resultList);
return resultList;
}
/**
* 读取每行单元格数据
* @param index 单元格序号, 从0开始
* @return 该行数据
*/
private static List<Object> readRow(int index) {
// 获取第index行数据
Row row = sheet.getRow(index);
if (null == row) {
return new ArrayList<>(0);
}
final short rowLength = row.getLastCellNum();
if (rowLength < 0) {
return ListUtil.empty();
}
final int size = Math.min(Short.MAX_VALUE + 1, rowLength);
final List<Object> cellValues = new ArrayList<>(size);
Object cellValue;
boolean isAllNull = true;
// 循环读取数据
for (int i = 0; i < size; i++) {
cellValue = CellUtil.getCellValue(row.getCell(i), true);
isAllNull &= StrUtil.isEmptyIfStr(cellValue);
// System.out.println(cellValue);
cellValues.add(cellValue);
}
if (isAllNull) {
// 如果每个元素都为空,则定义为空行
return ListUtil.empty();
}
// xls 03版
// HSSFColor hssfColor = (HSSFColor) cellStyle.getFillForegroundColorColor();
// String color = hssfColor.getHexString();
// System.out.println(color);
// xlsx 07版
//获取单元格背景颜色
try {
Cell cell = row.getCell(rowLength-1);
CellStyle cellStyle = cell.getCellStyle();
Optional.ofNullable(cellStyle.getFillForegroundColorColor()).ifPresent(color -> {
XSSFColor xssfColor = (XSSFColor)color ;
byte[] bytes = xssfColor.getRGB();
// 获取单元格背景颜色, 为rgb颜色
cellValues.add(bytes[0]+","+bytes[1]+","+bytes[2]);
});
} catch (Exception e) {
}
return cellValues;
}
}
只是简答了解基本的原理, 主要是excel转化为xml, 对xml就行解析的过程
public class POIExcelUtil {
public static List<List<Object>> readBySax(String path, int sheetIndex, int startRowIndex, int endRowIndex) throws InvalidFormatException {
OPCPackage opcPackage = OPCPackage.open(path);
InputStream sheetInputStream = null;
SharedStringsTable sharedStringsTable = null;
try {
final XSSFReader xssfReader = new XSSFReader(opcPackage);
// 获取共享样式表
try {
stylesTable = xssfReader.getStylesTable();
} catch (Exception e) {
//ignore
throw new RuntimeException("获取共享样式表异常");
}
// 获取共享字符串表
sharedStringsTable = xssfReader.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sharedStringsTable);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
sheetInputStream = xssfReader.getSheet(RID_PREFIX+(sheetIndex+1));
InputSource sheetSource = new InputSource(sheetInputStream);
parser.parse(sheetSource);
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new POIException(e);
} finally {
IoUtil.close(sheetInputStream);
IoUtil.close(opcPackage);
}
return resultList;
}
/**
* ClassName: SheetHandler
* Description: sheet处理类
* Author: sloth
* Date: 2018-02-26
*/
private static class SheetHandler extends DefaultHandler {
/**
* Field logger: 日志
*/
private static Logger logger = LoggerFactory.getLogger(SheetHandler.class);
/**
* Field sst: 共享字符串表对象
*/
private SharedStringsTable sst;
/**
* Field lastContents: 单元格内容
*/
private String lastContents;
/**
* Field nextIsString: 是否是字符串
*/
private boolean nextIsString;
/**
* Field nextIsDate: 是否是日期
*/
private boolean nextIsDate;
/**
* Field mapList: 读取Excel数据集(该变量可去除,因为集合中只有一个map)
*/
private ArrayList<HashMap<String, String>> mapList;
/**
* Field map: 键值对{单元格列名,单元格值}
*/
private HashMap<String, String> map;
/**
* Field key: 单元格坐标
*/
private String key;
/**
* Field value: 单元格值
*/
private String value;
/**
* Field index: 样式index
*/
private int index;
/**
* Description: 构造函数初始化
* @param sst 共享字符串表对象
*/
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
this.mapList = new ArrayList<HashMap<String, String>>();
this.map = new HashMap<String, String>();
}
/**
* Title: startElement
* Description:
* @param uri uri
* @param localName localName
* @param name XML标签名
* @param attributes XML标签对象
* @throws SAXException SAX异常
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// c => cell
if ("c".equals(name)) {
// Print the cell reference
this.key = attributes.getValue("r");
this.value = "";
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && "s".equals(cellType)) {
this.nextIsString = true;
} else {
/** 单元格是日期格式时c标签中s属性的值是数字 **/
cellType = attributes.getValue("s");
System.out.println(cellType);
this.nextIsString = false;
}
/** 判断是否是日期格式 **/
if (cellType != null && Pattern.compile("^[-\\+]?[\\d]*$").matcher(cellType).matches()) {
this.index = Integer.parseInt(cellType);
this.nextIsDate = true;
} else {
this.nextIsDate = false;
}
}
// Clear contents cache
this.lastContents = "";
}
/**
* Title: endElement
* Description:
* @param uri uri
* @param localName localName
* @param name XML标签名
* @throws SAXException SAX异常
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (this.nextIsString) {
int idx = Integer.parseInt(this.lastContents);
this.lastContents = new XSSFRichTextString(this.sst.getEntryAt(idx)).toString();
this.nextIsString = false;
}
if (this.nextIsDate && !"".equals(this.lastContents)) {
XSSFCellStyle style = stylesTable.getStyleAt(this.index);
short formatIndex = style.getDataFormat();
String formatString = style.getDataFormatString();
if (formatString.contains("m/d/yy")) {
formatString = "yyyy-MM-dd hh:mm:ss";
}
DataFormatter formatter = new DataFormatter();
this.lastContents = formatter.formatRawCellContents(Double.parseDouble(this.lastContents), formatIndex,
formatString);
System.out.println(this.lastContents);
this.nextIsDate = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if ("v".equals(name)) {
System.out.println(this.lastContents);
this.value = this.lastContents;
} else if ("c".equals(name)) {
this.map.put(this.key.replaceAll("\\d+", ""), this.value);
} else if ("row".equals(name)) {
String nowRow = this.key.replaceAll(this.key.replaceAll("\\d+", ""), "");
if (!"1".equals(nowRow)) {
this.mapList.add(this.map);
logger.info(String.valueOf(this.mapList));
insert(nowRow);
}
/** 清空存储集 **/
this.mapList.clear();
this.map.clear();
}
}
/**
* Description: 插入数据
* @param nowRow 插入数据所在行(用于检查Excel哪一行数据插入报错)
*/
private void insert(String nowRow) {
/** 迭代数据,通过对应列,获取数据库字段和值,拼接SQL **/
List<Object> list = new ArrayList<>();
HashMap<String, String> map = mapList.get(0);
map.forEach((K,V)-> list.add(V));
resultList.add(list);
/** 迭代数据,通过对应列,获取数据库字段和值,拼接SQL **/
}
/**
* Title: characters
* Description:
* @param ch 字符数组
* @param start 起始位
* @param length 长度
* @throws SAXException SAX异常
* @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) throws SAXException {
this.lastContents += new String(ch, start, length);
}
}
}
官网有详细的讲解, 可以参考, 以下简答给出一个示例
ExcelReader reader = ExcelUtil.getReader(file.getInputStream(), examineeExcelReq.getSheetIndex());
// 根据设置头行, 匹配类型中属性
reader.addHeaderAlias("序号", "id");
reader.addHeaderAlias("姓名", "studentName");
reader.addHeaderAlias("身份证号", "cardNumber");
reader.addHeaderAlias("等级", "certTypeName");
reader.addHeaderAlias("类别", "aircraftTypeName");
reader.addHeaderAlias("级别", "levelName");
reader.addHeaderAlias("理论次数", "theoryCount");
reader.addHeaderAlias("理论日期", "theoryDate");
reader.addHeaderAlias("理论通过", "theoryPass");
reader.addHeaderAlias("理论成绩", "theoryScore");
reader.addHeaderAlias("实践次数", "practiceCount");
reader.addHeaderAlias("实践日期", "practiceDate");
reader.addHeaderAlias("综合问答", "comprehensivePass");
reader.addHeaderAlias("飞行", "flightPass");
reader.addHeaderAlias("地面站", "groundPass");
reader.addHeaderAlias("委任代表", "commissionNames");
reader.addHeaderAlias("备注", "note");
List list =
reader.read(examineeExcelReq.getHeaderRowIndex(), examineeExcelReq.getStartRowIndex(), ExamineeExcelBo.class);
easy excel需要实现AnalysisEventListener类
public class PlanListener extends AnalysisEventListener {
private List planList = new ArrayList<>();
@Override
public void invoke(@Valid PlanBO siteBO, AnalysisContext analysisContext) {
// 读取每一行数据, 加入集合
planList.add(siteBO);
}
@Transactional(rollbackFor = Exception.class)
@Override
public void doAfterAllAnalysed(AnalysisContext analysisContext) {
//读取完成, 进行业务处理
}
public List getPlan() {
return planList;
}
}