大家好,又见面了,我是你们的朋友全栈君。如果您正在找激活码,请点击查看最新教程,关注关注公众号 “全栈程序员社区” 获取激活教程,可能之前旧版本教程已经失效.最新Idea2022.1教程亲测有效,一键激活。
Jetbrains全系列IDE使用 1年只要46元 售后保障 童叟无欺
由于项目组中原来的文件使用的XML格式作为配置,扩展性很好,但是阅读起来不是很直观,编辑起来也是不很方便,特别一些规则的二维表,个人觉得使用EXCEL文件会方便很多。所以为了方便阅读与编辑这些规则的二维表,花了一些时间写了一个Python脚本,以实现将XML文件转为Excel文件。
这里支持XML文件转为一个Sheet或者多个Sheet:
- 如果第二层所有标签都相同则会转为一个Sheet,所有第二层的标签都会作为行数据
- 如果第二层的标签有多种,则会把第二层的不同标签作为不同的Sheet,第三层的数据作为行数据
其它情况未作测试。
注意:脚本会自动根据所有同层标签计算所有字段以及所有子标签及其重复数,并作为列的扩展,如果XML的嵌套过深或者太复杂,可能会导致扩展后的列数超过EXCEL所支持的最大列数(16384列),导致失败。
附上源码以共享,当然,如果有更好的Idea进行改进欢迎留言。
#
# XML文件转换成Excel文件
#
# 版本:1.0
#
# 作者:Witton Bell
# E_Mail:witton@163.com
#
#
# 功能描述:
#
# 自动检测源文件编码是否为UTF8,如果不是,则使用GB18030进行解码,并转换成UTF8编码的文件,再进行XML源文件的读取
#
# 自动提取XML中的标签作为Excel的标题列,支持嵌套与重复,但是所有列不能超过EXCEL支持的最大列(16384列)
#
# 仅支持XML的第一层只有一个标签,
# 第二层标签可以只有一个标签(允许重复),此时第一层标签作为Excel的WorkSheet,第二层标签及其嵌套作为一行数据)
# 第二层标签也可以有多个标签(允许重复),此时第二层标签作为Excel的WorkSheet,第三层标签及其嵌套作为一行数据)
#
# 由于openpyxl默认为Excel的xlsx格式,所以本脚本的所有Excel文件格式都为xlsx
#
#
import openpyxl
from openpyxl.styles import Alignment
import xml.dom.minidom
from xml.dom.minidom import Document
from openpyxl.styles import Border, Side
import chardet
import os
class XMLNode:
def __init__(self):
self.name = ""
self.properties = []
self.child = []
self.layer = 0
self.index = 0
self.parent = None
self.node_info = None
class XMLNodeInfo:
def __init__(self, node):
self.map_properties = {
}
self.map_child = {
}
self.max_index = 0
self.layer = node.layer
self.name = node.name
self.register(node)
def register(self, node):
for k in node.properties:
if self.map_properties.get(k[0]) is None:
self.map_properties[k[0]] = self.map_properties.__len__()
for ch in node.child:
v = self.map_child.get(ch.name)
if v is None:
self.map_child[ch.name] = ch.node_info
if node.index > self.max_index:
self.max_index = node.index
class XMLReader:
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
def __init__(self, file_path, is_auto_convert2utf8, is_convert_to_original_file):
self.__root = []
self.__map_node_info = {
}
if is_auto_convert2utf8:
is_tmp_file, tmp_file_path = self.__convert_to_utf8(file_path)
fd = xml.dom.minidom.parse(tmp_file_path)
if is_tmp_file:
if is_convert_to_original_file:
os.remove(file_path)
os.rename(tmp_file_path, file_path)
else:
os.remove(tmp_file_path)
else:
fd = xml.dom.minidom.parse(file_path)
index = 0
for child in fd.childNodes:
if child.nodeType != xml.dom.Node.ELEMENT_NODE:
continue
self.__read_node(child, self.__root, index, None)
index += 1
def get_root_node(self):
return self.__root
@staticmethod
def __convert_to_utf8(file_path):
fd = open(file_path, "rb")
fd.seek(0, 2)
size = fd.tell()
if size > 1024 * 1024:
size = 1024 * 1024
fd.seek(0, 0)
text = fd.read(size)
ret = chardet.detect(text)
if ret['encoding'].lower().find("utf-8") != -1:
return False, file_path
tmp_file = file_path + ".tmp"
file = open(tmp_file, "w", encoding="utf-8", newline="\n")
fd.seek(0, 0)
line = fd.readline()
while line.__len__() > 0:
file.write(line.decode("gb18030"))
line = fd.readline()
file.close()
fd.close()
return True, tmp_file
@staticmethod
def __get_attrib(node, rc):
if node._attrs is None:
return
for key in node._attrs:
v = node._attrs[key]._value
rc.append([key, v])
def __read_node(self, node, root, index, parent, layer=1):
xml_node = XMLNode()
xml_node.name = node.nodeName
xml_node.layer = layer
xml_node.index = index
xml_node.parent = parent
self.__get_attrib(node, xml_node.properties)
i = 0
for child in node.childNodes:
if child.nodeType != xml.dom.Node.ELEMENT_NODE:
continue
self.__read_node(child, xml_node.child, i, xml_node, layer + 1)
i += 1
root.append(xml_node)
self.__register(xml_node)
def __register(self, node):
key = node.name + str(node.layer)
nd = self.__map_node_info.get(key)
if nd is None:
nd = XMLNodeInfo(node)
node.node_info = nd
self.__map_node_info[key] = nd
else:
nd.register(node)
node.node_info = nd
class XMLWriter:
def __init__(self, xml_file_path, xml_node_list):
doc = Document()
for node in xml_node_list:
ele = self.__write_node(node, doc)
doc.appendChild(ele)
f = open(xml_file_path, 'w', encoding='utf-8')
doc.writexml(f, indent='\t', addindent='\t', encoding='utf-8', newl='\n')
def __write_node(self, node, doc):
ele = doc.createElement(node.name)
for prop in node.properties:
ele.setAttribute(prop[0], prop[1])
for child in node.childs:
ret = self.__write_node(child, doc)
ele.appendChild(ret)
return ele
class XmlToXls:
# read_from_xml_file_path:XML源文件完整路径
# save_to_xls_file_path:保存转换后的Excel文件完整路径
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
# is_merge_head:如果表头有多行,是否合并层属关系的表头
# is_border_cell:是否添加单元格的边框
# is_alignment_center:单元格是否居中对齐
def __init__(self, read_from_xml_file_path, save_to_xls_file_path,
is_auto_convert2utf8=True, is_convert_to_original_file=False,
is_merge_head=False, is_border_cell=False, is_alignment_center=True):
try:
self.__is_merge_head = is_merge_head
if is_alignment_center:
self.__alignment = Alignment(horizontal='center', vertical='center')
else:
self.__alignment = None
if is_border_cell:
side = Side(border_style='thin', color='000000')
self.__border = Border(side, side, side, side, side)
else:
self.__border = None
wb = openpyxl.Workbook()
wb.encoding = 'utf-8'
for sh in wb.worksheets:
wb.remove_sheet(sh)
reader = XMLReader(read_from_xml_file_path,
is_auto_convert2utf8,
is_convert_to_original_file)
self.__write(reader.get_root_node(), wb)
wb.save(save_to_xls_file_path)
wb.close()
except Exception as e:
print(e)
def __write(self, xml_node, wb):
self.__map_field = {
}
for node in xml_node:
if node.node_info.map_child.__len__() == 1:
self.__start_layer = 1
self.__write_sheet(node, wb)
else:
self.__start_layer = 2
for child in node.child:
self.__write_sheet(child, wb)
def __write_sheet(self, node, wb):
sh = wb.create_sheet(node.name)
self.__write_head(node.node_info, sh)
self.__write_data(node, sh)
def __write_head(self, node, sh):
for key in node.map_child:
col = 1
child = node.map_child[key]
for k in child.map_properties:
c = child.map_properties[k]
self.__write_cell(sh, child.layer - self.__start_layer, c + col, k)
col += child.map_properties.__len__()
self.__write_head_ext(child, sh, col)
break
def __write_head_ext(self, node, sh, col):
for key in node.map_child:
child = node.map_child[key]
num = child.map_properties.__len__()
for i in range(0, child.max_index + 1):
if col > 16384:
raise Exception("超过EXCEL最大列数(16384列)限制,转换失败")
old_col = col
self.__write_cell(sh, child.layer - self.__start_layer - 1, col, child.name)
for k in child.map_properties:
c = child.map_properties[k]
self.__write_cell(sh, child.layer - self.__start_layer, c + col, k)
col += num
col = self.__write_head_ext(child, sh, col)
if self.__is_merge_head:
merge_row = child.layer - self.__start_layer - 1
sh.merge_cells(start_row=merge_row, end_row=merge_row,
start_column=old_col, end_column=col - 1)
return col
def __write_data(self, node, sh):
row = sh.max_row + 1
sh.freeze_panes = sh.cell(row, 1)
for child in node.child:
col = 1
self.__write_data_ext(child, sh, row, col)
row += 1
def __write_data_ext(self, node, sh, row, col):
m = node.node_info.map_properties
max_prop_num = m.__len__()
for prop in node.properties:
c = m[prop[0]]
self.__write_cell(sh, row, c + col, prop[1])
col += max_prop_num
map_child = {
}
for child in node.child:
if map_child.get(child.name) is None:
map_child[child.name] = 1
else:
map_child[child.name] += 1
col = self.__write_data_ext(child, sh, row, col)
for key in node.node_info.map_child:
child = node.node_info.map_child[key]
all_count = child.max_index + 1
count = map_child.get(key)
if count is not None:
all_count -= count
for i in range(0, all_count):
col += child.map_properties.__len__()
return col
def __write_cell(self, sh, row, col, value):
if value.isdigit():
cell = sh.cell(row, col, int(value))
else:
cell = sh.cell(row, col, value)
cell.alignment = self.__alignment
cell.border = self.__border
由于使用的XML解析器不支持值中有大于(>),小于(<)这些特殊符号,所以如果值中有这些符号的XML文件会解析失败,报错:
not well-formed (invalid token)
比如下面的XML文件就会报上面的错:
<?xml version='1.0' encoding='UTF-8'?>
<test>
<testData value="<测试数据>"/>
</test>
也不支持没有根节点的XML文件:
比如:
<?xml version='1.0' encoding='UTF-8'?>
<A Value="A1">
<AA value="a"/>
</A>
<B Value="B1">
<BB value="b"/>
</B>
<C Value="C1">
<CC value="c"/>
</C>
会报错:
junk after document element
C++使用的tinyxml是可以正常解析大于小于等特殊符号的,网上有一个pytinyxml2开源项目,让python可以使用tinyxml进行解析。
安装pytinyxml2之前需要先安装swig,Windows下可以下载:swigwin-4.0.1.zip,这是已经编译好的版本,解压出来后添加路径到PATH环境变量即可。
非Windows可以下载swig-4.0.1.tar.gz进行编译安装,也可以直接:
yum install swig
安装好swig后,在pytinyxml2源码目录中执行:
python setup.py install
不能使用
pip install pytinyxml2
进行安装,我遇到有报错:
running bdist_wheel
running build
running build_py
creating build
creating build/lib.linux-x86_64-3.5
copying pytinyxml2.py -> build/lib.linux-x86_64-3.5
running build_ext
building '_pytinyxml2' extension
swigging pytinyxml2.i to pytinyxml2_wrap.cpp
swig -python -c++ -o pytinyxml2_wrap.cpp pytinyxml2.i
pytinyxml2.i:5: Error: Unable to find 'tinyxml2.h'
error: command 'swig' failed with exit status 1
查看pytinyxml2的包可以看到,里面缺少tinyxml2.h
项目中原来的程序只能读取XML格式的文档,我们为了方便编辑与查阅,所以把XML转为Excel,编辑完成后,还需要把Excel转为原来的XML,所以实现了XLS转XML,并对之前的代码作了部分修改,附上源码:
# -*- coding: UTF-8 -*-
#
# XML文件与Excel文件互转
#
# 版本:1.1
#
# 作者:Witton Bell
# E_Mail:witton@163.com
#
#
# 功能描述:
#
# XmlToXls:
#
# 自动检测源文件编码是否为UTF8,如果不是,则使用GB18030进行解码,并转换成UTF8编码的文件,再进行XML源文件的读取
#
# 自动提取XML中的标签作为Excel的标题列,支持嵌套与重复,但是所有列不能超过EXCEL支持的最大列(16384列)
#
# 使用XMLReader仅支持XML的只有一个根标签(标准XML格式),使用TinyXMLReader支持有多个根标签
# 第二层标签可以只有一个标签(允许重复),此时第一层标签作为Excel的WorkSheet,第二层标签及其嵌套作为一行数据)
# 第二层标签也可以有多个标签(允许重复),此时第二层标签作为Excel的WorkSheet,第三层标签及其嵌套作为一行数据)
#
# 由于openpyxl默认为Excel的xlsx格式,所以本脚本的所有Excel文件格式都为xlsx
#
# XlsToXml:
# 同样XMLWriter仅支持XML只有一个根标签(标准XML格式),使用TinyXMLWriter支持有多个根标签
# Excel文件需要有层次分明的表头,并且需要冻结表头
#
import openpyxl
from openpyxl.styles import Alignment
from openpyxl.styles import Border, Side
from openpyxl.comments import Comment
import openpyxl.utils as xls_util
import chardet
import os
import xml.dom.minidom
import copy
class XMLNode:
def __init__(self):
self.name = ""
self.properties = []
self.child = []
self.layer = 0
self.index = 0
self.parent = None
self.node_info = None
class XMLNodeInfo:
def __init__(self, node):
self.map_properties = {
}
self.map_child = {
}
self.max_index = 0
self.layer = node.layer
self.name = node.name
self.parent = node.parent
self.register(node)
def register(self, node):
for k in node.properties:
if self.map_properties.get(k[0]) is None:
self.map_properties[k[0]] = self.map_properties.__len__()
for ch in node.child:
v = self.map_child.get(ch.name)
if v is None:
self.map_child[ch.name] = ch.node_info
if node.index > self.max_index:
self.max_index = node.index
class _XMLReaderBase:
def __init__(self):
self._root = []
self._map_node_info = {
}
def __calc_node_key(self, node):
key = ""
if node.parent is not None:
key = self.__calc_node_key(node.parent)
return "%s_%s" % (key, node.name)
def _register(self, node):
key = self.__calc_node_key(node)
nd = self._map_node_info.get(key)
if nd is None:
nd = XMLNodeInfo(node)
node.node_info = nd
self._map_node_info[key] = nd
else:
nd.register(node)
node.node_info = nd
@staticmethod
def _convert_to_utf8(file_path):
fd = open(file_path, "rb")
fd.seek(0, 2)
size = fd.tell()
if size > 1024 * 1024:
size = 1024 * 1024
fd.seek(0, 0)
text = fd.read(size)
ret = chardet.detect(text)
if ret['encoding'].lower().find("utf-8") != -1:
return False, file_path
tmp_file = file_path + ".tmp"
file = open(tmp_file, "w", encoding="utf-8", newline="\n")
fd.seek(0, 0)
line = fd.readline()
while line.__len__() > 0:
file.write(line.decode("gb18030"))
line = fd.readline()
file.close()
fd.close()
return True, tmp_file
# 该类使用xml不能解析值中带特殊符号的文件,也不支持没有统一根节点的XML文件,建议使用TinyXMLReader
class XMLReader(_XMLReaderBase):
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
def read(self, file_path, is_auto_convert2utf8=True, is_convert_to_original_file=False):
_XMLReaderBase.__init__(self)
if is_auto_convert2utf8:
is_tmp_file, tmp_file_path = self._convert_to_utf8(file_path)
fd = xml.dom.minidom.parse(tmp_file_path)
if is_tmp_file:
if is_convert_to_original_file:
os.remove(file_path)
os.rename(tmp_file_path, file_path)
else:
os.remove(tmp_file_path)
else:
fd = xml.dom.minidom.parse(file_path)
index = 0
for child in fd.childNodes:
if child.nodeType != xml.dom.Node.ELEMENT_NODE:
continue
self.__read_node(child, self._root, index, None)
index += 1
return self._root
@staticmethod
def __get_attrib(node, rc):
if node._attrs is None:
return
for key in node._attrs:
v = node._attrs[key]._value
rc.append([key, v])
def __read_node(self, node, root, index, parent, layer=1):
xml_node = XMLNode()
xml_node.name = node.nodeName
xml_node.layer = layer
xml_node.index = index
xml_node.parent = parent
self.__get_attrib(node, xml_node.properties)
i = 0
for child in node.childNodes:
if child.nodeType != xml.dom.Node.ELEMENT_NODE:
continue
self.__read_node(child, xml_node.child, i, xml_node, layer + 1)
i += 1
root.append(xml_node)
self._register(xml_node)
# 该类需要安装pytinyxml2,参见:https://blog.csdn.net/witton/article/details/100302498
class TinyXMLReader(_XMLReaderBase):
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
def read(self, file_path, is_auto_convert2utf8=True, is_convert_to_original_file=False):
_XMLReaderBase.__init__(self)
import pytinyxml2
doc = pytinyxml2.XMLDocument()
if is_auto_convert2utf8:
is_tmp_file, tmp_file_path = self._convert_to_utf8(file_path)
doc.LoadFile(tmp_file_path)
if is_tmp_file:
if is_convert_to_original_file:
os.remove(file_path)
os.rename(tmp_file_path, file_path)
else:
os.remove(tmp_file_path)
else:
doc.LoadFile(file_path)
node = doc.RootElement()
index = 0
while node is not None:
self.__read_node(node, self._root, index, None)
node = node.NextSiblingElement()
index += 1
return self._root
@staticmethod
def __get_attrib(node, rc):
attrib = node.FirstAttribute()
while attrib is not None:
key = attrib.Name()
v = attrib.Value()
rc.append([key, v])
attrib = attrib.Next()
def __read_node(self, node, root, index, parent, layer=1):
xml_node = XMLNode()
xml_node.name = node.Value()
xml_node.layer = layer
xml_node.index = index
xml_node.parent = parent
self.__get_attrib(node, xml_node.properties)
i = 0
child = node.FirstChildElement()
while child is not None:
self.__read_node(child, xml_node.child, i, xml_node, layer + 1)
child = child.NextSiblingElement()
i += 1
root.append(xml_node)
self._register(xml_node)
class XMLWriter:
def __init__(self, xml_file_path, xml_node_list):
doc = xml.dom.minidom.Document()
for node in xml_node_list:
ele = self.__write_node(node, doc)
doc.appendChild(ele)
f = open(xml_file_path, 'w', encoding='utf-8')
doc.writexml(f, indent='\t', addindent='\t', encoding='utf-8', newl='\n')
f.close()
def __write_node(self, node, doc):
ele = doc.createElement(node.name)
for prop in node.properties:
ele.setAttribute(prop[0], prop[1])
for child in node.child:
ret = self.__write_node(child, doc)
ele.appendChild(ret)
return ele
class TinyXMLWriter:
# is_convert_spec_character: 是否转换特殊字符
def __init__(self, xml_file_path, xml_node_list, is_convert_spec_character=True):
import pytinyxml2
doc = pytinyxml2.XMLDocument(is_convert_spec_character)
decl = doc.NewDeclaration()
doc.LinkEndChild(decl)
for node in xml_node_list:
ele = self.__write_node(node, doc)
doc.LinkEndChild(ele)
doc.SaveFile(xml_file_path)
def __write_node(self, node, doc):
ele = doc.NewElement(node.name)
for prop in node.properties:
ele.SetAttribute(prop[0], prop[1])
for child in node.child:
ret = self.__write_node(child, doc)
ele.LinkEndChild(ret)
return ele
class XmlToXls:
def __init__(self):
self.__is_merge_head = False
self.__alignment_center = None
self.__border = None
# read_from_xml_file_path:XML源文件完整路径
# save_to_xls_file_path:保存转换后的Excel文件完整路径
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
# is_merge_head:如果表头有多行,是否合并层属关系的表头
# is_border_cell:是否添加单元格的边框
# is_use_tiny_xml:是否使用tinyXML
def convert(self, read_from_xml_file_path, save_to_xls_file_path,
is_auto_convert2utf8=True, is_convert_to_original_file=False,
is_merge_head=False, is_border_cell=False, is_use_tiny_xml=True):
try:
self.__is_merge_head = is_merge_head
self.__alignment_center = Alignment(horizontal='center', vertical='center')
if is_border_cell:
side = Side(border_style='thin', color='000000')
self.__border = Border(side, side, side, side, side)
else:
self.__border = None
wb = openpyxl.Workbook()
wb.encoding = 'utf-8'
for sh in wb.worksheets:
wb.remove_sheet(sh)
if is_use_tiny_xml:
reader = TinyXMLReader()
else:
reader = XMLReader()
root = reader.read(read_from_xml_file_path,
is_auto_convert2utf8,
is_convert_to_original_file)
self.__write(root, wb)
wb.save(save_to_xls_file_path)
wb.close()
except Exception as e:
print(e)
# src_path_dir:XML源目录完整路径
# dst_path_dir:保存转换后的Excel文件完整目录路径,如果为None或者为空,则直接转换在源文件应对目录下
# is_auto_convert2utf8:是否自动转换为UTF8编码
# is_convert_to_original_file:如果转换为UTF8,是否覆盖原文件
# is_merge_head:如果表头有多行,是否合并层属关系的表头
# is_border_cell:是否添加单元格的边框
# is_use_tiny_xml:是否使用tinyXML
def convert_dirs(self, src_path_dir, dst_path_dir=None,
is_auto_convert2utf8=True, is_convert_to_original_file=False,
is_merge_head=False, is_border_cell=False, is_use_tiny_xml=True):
if dst_path_dir is not None and dst_path_dir != "":
if not os.path.exists(dst_path_dir):
os.mkdir(dst_path_dir)
for root, dirs, files in os.walk(src_path_dir):
for name in files:
basename, ext = os.path.splitext(name)
if ext != ".xml":
continue
src = os.path.join(root, name)
target = basename + ".xlsx"
print("处理%s" % src)
if dst_path_dir is None or dst_path_dir == "":
dst = os.path.join(root, target)
else:
dst = os.path.join(dst_path_dir, target)
self.convert(src, dst, is_auto_convert2utf8, is_convert_to_original_file,
is_merge_head, is_border_cell, is_use_tiny_xml)
def __write(self, xml_node, wb):
self.__map_field = {
}
for node in xml_node:
if node.node_info.map_child.__len__() == 1:
self.__is_multi_sheet = False
self.__write_sheet(node, wb)
else:
self.__is_multi_sheet = True
for child in node.child:
self.__write_sheet(child, wb)
def __write_sheet(self, node, wb):
sh = wb.create_sheet(node.name)
self.__write_head(node, sh)
self.__write_data(node, sh)
def __write_head(self, node, sh):
node_info = node.node_info
if self.__is_multi_sheet:
self.__write_head_cell(sh, node_info.parent.layer, 1, node_info.parent.name)
self.__write_head_cell(sh, node_info.layer, 1, node_info.name)
comment_str = ""
for prop in node.properties:
comment_str += '%s="%s"\n' % (prop[0], prop[1])
if comment_str != "":
sh.cell(node_info.layer, 1).comment = Comment(comment_str, "", width=300)
elif not self.__is_multi_sheet:
self.__write_head_cell(sh, node_info.layer, 1, node_info.name)
child_name = None
for key in node_info.map_child:
col = 1
child = node_info.map_child[key]
child_name = child.name
for k in child.map_properties:
c = child.map_properties[k]
self.__write_head_cell(sh, child.layer, c + col, k)
col += child.map_properties.__len__()
self.__write_head_ext(child, sh, col)
break
if self.__is_multi_sheet:
row = 3
else:
row = 2
if child_name is not None:
sh.insert_rows(row)
self.__write_head_cell(sh, row, 1, child_name)
def __write_head_ext(self, node, sh, col):
for key in node.map_child:
child = node.map_child[key]
num = child.map_properties.__len__()
for i in range(0, child.max_index + 1):
if col > 16384:
raise Exception("超过EXCEL最大列数(16384列)限制,转换失败")
old_col = col
self.__write_head_cell(sh, child.layer - 1, col, child.name)
for k in child.map_properties:
c = child.map_properties[k]
self.__write_head_cell(sh, child.layer, c + col, k)
col += num
col = self.__write_head_ext(child, sh, col)
if self.__is_merge_head:
merge_row = child.layer - 1
sh.merge_cells(start_row=merge_row, end_row=merge_row,
start_column=old_col, end_column=col - 1)
return col
def __write_data(self, node, sh):
row = sh.max_row + 1
sh.freeze_panes = sh.cell(row, 1)
for child in node.child:
col = 1
self.__write_data_ext(child, sh, row, col)
row += 1
def __write_data_ext(self, node, sh, row, col):
m = node.node_info.map_properties
max_prop_num = m.__len__()
for prop in node.properties:
c = m[prop[0]]
self.__write_cell(sh, row, c + col, prop[1])
col += max_prop_num
map_child = {
}
for child in node.child:
if map_child.get(child.name) is None:
map_child[child.name] = 1
else:
map_child[child.name] += 1
col = self.__write_data_ext(child, sh, row, col)
for key in node.node_info.map_child:
child = node.node_info.map_child[key]
all_count = child.max_index + 1
count = map_child.get(key)
if count is not None:
all_count -= count
for i in range(0, all_count):
col += child.map_properties.__len__()
return col
def __write_head_cell(self, sh, row, col, value):
cell = sh.cell(row, col, value)
cell.border = self.__border
cell.alignment = self.__alignment_center
def __write_cell(self, sh, row, col, value):
if value.isdigit():
cell = sh.cell(row, col, int(value))
else:
cell = sh.cell(row, col, value)
cell.border = self.__border
class XlsToXml:
def __init__(self):
pass
@staticmethod
def __add_prop(map_field, col, value):
if map_field.__len__() == 0:
return
if map_field.get(col) is None:
# 找本节点
c = col - 1
while c >= 1:
if map_field.get(c) is not None:
node = map_field[c][0]
break
c -= 1
else:
node = map_field[col][0]
node.properties.append([value, col])
@staticmethod
def __add_node(map_field, row, col, value):
node = XMLNode()
node.name = value
if map_field.get(col) is not None:
node.parent = map_field[col][0]
else:
# 找父节点
c = col - 1
while c >= 1:
if map_field.get(c) is not None:
if row > map_field[c][1]:
node.parent = map_field[c][0]
break
c -= 1
if node.parent is not None:
node.parent.child.append(node)
node.layer = node.parent.layer + 1
else:
node.layer = 1
map_field[col] = [node, row]
return node
def __read_xls(self, file_path):
wb = openpyxl.load_workbook(file_path)
root_data = XMLNode()
is_multi_sheet = wb.worksheets.__len__() > 1
for sh in wb.worksheets:
max_row = sh.max_row + 1
max_col = sh.max_column + 1
if sh.freeze_panes is None:
raise Exception("文件[%s]表单[%s]的无冻结窗口,无法确定表头,转为XML失败" %
(os.path.basename(file_path), sh.title))
head_row, head_col = xls_util.coordinate_to_tuple(sh.freeze_panes)
if head_col != 1:
raise Exception("文件[%s]表单[%s]的冻结窗口列不为1,无法转为XML" % (os.path.basename(file_path), sh.title))
root = None
map_field = {
}
for col in range(1, max_col):
for row in range(1, head_row):
cell = sh.cell(row, col)
value = cell.value
comment = cell.comment
if value is None:
continue
next_row = row + 1
if next_row >= head_row:
self.__add_prop(map_field, col, value)
continue
next_row_value = sh.cell(next_row, col).value
if next_row_value is None:
self.__add_prop(map_field, col, value)
continue
node = self.__add_node(map_field, row, col, value)
if root is None:
root = node
if comment is None:
continue
comment = comment.text
lines = comment.splitlines()
for line in lines:
props = line.split('=')
kv = []
for prop in props:
prop = prop.replace('"', '')
kv.append(prop)
if kv.__len__() == 2:
node.properties.append(kv)
kv = []
root_data.name = root.name
root_data.layer = root.layer
if is_multi_sheet and root.child.__len__() > 0:
child_list = copy.deepcopy(root.child[0].child)
root.child[0].child = []
root_data.child.append(root.child[0])
root_data_child = root.child[0].child
else:
child_list = copy.deepcopy(root.child)
root_data_child = root_data.child
for row in range(head_row, max_row):
clone = copy.deepcopy(child_list)
for child in clone:
self.__read_node_data(child, sh, row)
root_data_child.append(child)
return root_data
def __read_node_data(self, node, sh, row):
prop_list = []
for prop in node.properties:
col = prop[1]
value = sh.cell(row, col).value
if value is not None:
prop_list.append([prop[0], value])
child_list = []
for child in node.child:
self.__read_node_data(child, sh, row)
if child.properties.__len__() > 0 or child.child.__len__() > 0:
child_list.append(child)
copy.copy(child)
node.properties = prop_list
node.child = child_list
def convert(self, src_file_path, dst_file_path, is_use_tiny_xml=True, is_convert_spec_character=False):
root = self.__read_xls(src_file_path)
if is_use_tiny_xml:
TinyXMLWriter(dst_file_path, [root], is_convert_spec_character)
else:
XMLWriter(dst_file_path, [root])
def convert_dirs(self, src_path_dir, dst_path_dir=None, is_use_tiny_xml=True, is_convert_spec_character=False):
for root, dirs, files in os.walk(src_path_dir):
for name in files:
basename, ext = os.path.splitext(name)
if ext != ".xls" and ext != ".xlsx":
continue
src = os.path.join(root, name)
target = basename + "1" + ".xml"
print("处理%s" % src)
if dst_path_dir is None or dst_path_dir == "":
dst = os.path.join(root, target)
else:
dst = os.path.join(dst_path_dir, target)
try:
self.convert(src, dst, is_use_tiny_xml, is_convert_spec_character)
except Exception as e:
print(e)
有人在问如何使用,给一个XML转Excel的示例,假如有一个test.xml文件:
<?xml version="1.0" encoding="UTF-8"?>
<Test>
<Data Type="1001" Desc = "这是描述">
<Condition Value="10" Name="名字1">
<AwardItem ItemId = "5" SubId = "10" Count = "10" />
</Condition>
<Condition Value="20" Name="名字2">
<AwardItem ItemId = "5" SubId = "100" Count = "50" />
</Condition>
<Condition Value="30" Name="名字3">
<AwardItem ItemId = "5" SubId = "1000" Count = "100" />
</Condition>
</Data>
<Data Type="1002" Desc = "这是描述">
<Condition Value="100" Name="名字10">
<AwardItem ItemId = "5" SubId = "10" Count = "10" />
</Condition>
<Condition Value="200" Name="名字20">
<AwardItem ItemId = "5" SubId = "100" Count = "50" />
</Condition>
<Condition Value="300" Name="名字30">
<AwardItem ItemId = "5" SubId = "1000" Count = "100" />
</Condition>
<Condition Value="400" Name="名字40">
<AwardItem ItemId = "5" SubId = "5000" Count = "200" />
</Condition>
</Data>
</Test>
我们写一个XML转XLSX的调用程序test.py
import sys
import xml2xls
import traceback
def main():
if sys.argv.__len__() < 3:
print("格式错误,格式:<命令> <XML源文件> <XLSX目标文件>")
return
c = xml2xls.XmlToXls()
try:
c.convert(sys.argv[1], sys.argv[2])
print("处理文件%s成功" % sys.argv[1])
except Exception as e:
print("处理文件%s失败, 异常:%s" % (sys.argv[1], e))
print(traceback.format_exc())
if __name__ == '__main__':
main()
在命令行执行:
python test.py test.xml test.xlsx
就会生成一个test.xlsx的Excel文件,使用Excel打开,结果如图:
从前面的XML可以看到,第一行只有3组Condition,第二行有4组Condition,所以最后按最大值4组来展示,第一行没第4组Condition,则为空。
同时,转换后的Excel表头是锁定状态,并且表头是有层次结构的,对应XML的层次。同样,如果要将一个Excel表转换成XML,也需要有锁定状态的且有层次结构的表头。
祝好!
发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/171942.html原文链接:https://javaforall.cn
【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛
【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...