当前位置：百木园 > 野生技术 > 正文

Python 基于xml.etree.ElementTree实现XML对比

2022-12-12 分类：野生技术阅读(300) 评论(0)

测试环境

Python 3.6

Win10

代码实现

#!/usr/bin/env python 3.4.0
#-*- encoding:utf-8 -*-

__author__ = \'shouke\'

import xml.etree.ElementTree as ET

def compare_xml_node_attributes(xml_node1, xml_node2):
    result = []
    node1_attributes_dict = xml_node1.attrib
    node2_attributes_dict = xml_node2.attrib
    for attrib1, value in node1_attributes_dict.items():
        value2 =  node2_attributes_dict.get(attrib1)
        if value == value2:
            node2_attributes_dict.pop(attrib1)
        else:
            if value2:
                attrib2 = attrib1
                node2_attributes_dict.pop(attrib2)
            else:
                attrib2 = \'不存在\'
            result.append(\'结点1属性：{attrib1} 值：{value1}，结点2属性：{attrib1} 值：{value2}\'.format(attrib1=attrib1 or \'不存在\',
                                                                                         value1=value or \'不存在\',
                                                                                         attrib2=attrib2,
                                                                                         value2=value2 or \'不存在\'))

    for attrib2, value2 in node2_attributes_dict.items():
        result.append(\'结点1属性：{attrib1} 值：{value1}，结点2属性：{attrib1} 值：{value2}\'.format(attrib1=\'不存在\',
                                                                                         value1=\'不存在\',
                                                                                         attrib2=attrib2,
                                                                                         value2=value2))
    return result


def compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath):
    def get_node_children(xml_node, node_xpath):
        result = {}
        for child in list(xml_node):
            if child.tag not in result:
                result[child.tag] = [{\'node\':child, \'xpath\': \'%s/%s[%s]\' % (node_xpath, child.tag, 1)}]
            else:
                result[child.tag].append({\'node\':child, \'xpath\': \'%s/%s[%s]\' % (node_xpath, child.tag, len(result[child.tag])+1)})
        return result

    result = []
    children_of_node1_dict = get_node_children(xml_node1, node1_xpath)
    children_of_node2_dict = get_node_children(xml_node2, node2_xpath)

    temp_list1 = []
    temp_list2 = []
    for child_tag, child_node_list in children_of_node1_dict.items():
        second_child_node_list = children_of_node2_dict.get(child_tag, [])
        if not second_child_node_list:
            # 获取xml1中比xml2中多出的子结点
            for i in range(0, len(child_node_list)):
                temp_list1.append(\'%s/%s[%s]\' % (node1_xpath, child_node_list[i][\'node\'].tag, i+1))
            continue

        for first_child, second_child in zip(child_node_list, second_child_node_list):
            result.extend(compare_xml_nodes(first_child[\'node\'], second_child[\'node\'], first_child[\'xpath\'], second_child[\'xpath\']))

        # 获取xml2中对应结点比xml1中对应结点多出的同名子结点
        for i in range(len(child_node_list), len(second_child_node_list)):
            temp_list2.append(\'%s/%s[%s]\' % (node2_xpath, second_child_node_list[i][\'node\'].tag, i+1))
        children_of_node2_dict.pop(child_tag)

    if temp_list1:
        result.append(\'子结点不一样：xml1结点(xpath：{xpath1})比xml2结点(xpath：{xpath2})多了以下子结点：\\n{differences}\'.format (xpath1=node1_xpath,
                                                                                                  xpath2=node2_xpath,
                                                                                                  differences=\'\\n\'.join(temp_list1)))
    # 获取xml2比xml1中多出的子结点
    for child_tag, child_node_list in children_of_node2_dict.items():
        for i in range(0, len(child_node_list)):
            temp_list2.append(\'%s/%s[%s]\' % (node1_xpath, child_node_list[i][\'node\'].tag, i+1))

    if temp_list2:
        result.append(\'子结点不一样：xml1结点(xpath：{xpath1})比xml2结点(xpath：{xpath2})少了以下子结点：\\n{differences}\'.format (xpath1=node1_xpath,
                                                                                                  xpath2=node2_xpath,
                                                                                                  differences=\'\\n\'.join(temp_list2)))
    return result


def compare_xml_nodes(xml_node1, xml_node2, node1_xpath=\'\', node2_xpath=\'\'):
    result = []
    # 比较标签
    if xml_node1.tag !=  xml_node2.tag:
        result.append(\'标签不一样：xml1结点(xpath：{xpath1})：{tag1}，xml2结点(xpath：{xpath2})：{tag2}\'.format (xpath1=node1_xpath,
                                                                                                  tag1=xml_node1.tag,
                                                                                                  xpath2=node2_xpath,
                                                                                                  tag2=xml_node2.tag))

    # 比较文本
    if xml_node1.text !=  xml_node2.text:
        result.append(\'文本不一样：xml1结点(xpath：{xpath1})：{text1}，xml2结点(xpath：{xpath2})：{text2}\'.format (xpath1=node1_xpath,
                                                                                                  tag1=xml_node1.text or \'\',
                                                                                                  xpath2=node2_xpath,
                                                                                                  tag2=xml_node2.text or \'\'))

    # 比较属性
    res = compare_xml_node_attributes(xml_node1, xml_node2)
    if res:
        result.append(\'属性不一样：xml1结点(xpath：{xpath1})，xml2结点(xpath：{xpath2})：\\n{differences}\'.format (xpath1=node1_xpath,
                                                                                                  xpath2=node2_xpath,
                                                                                                  differences=\'\\n\'.join(res)))
    # 比较子结点
    res = compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath)
    if res:
        result.extend(res)

    return result


def compare_xml_strs(xml1_str, xml2_str, mode=3):
    \'\'\'
    @param: mode 比较模式，预留，暂时没用。目前默认 xml 子元素如果为列表，则列表有序列表，按序比较
    \'\'\'
    root1 = ET.fromstring(xml1_str.strip())
    root2 = ET.fromstring(xml2_str.strip())

    return compare_xml_nodes(root1, root2, \'/%s\' % root1.tag, \'/%s\' % root2.tag)

测试运行

xml_str1 = \'\'\'
<?xml version = \"1.0\" encoding=\"utf-8\" ?>
<data>
    <country name=\"Liechtenstein\">
        <rangk>1</rangk>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name=\"Austria\" direction=\"E\" ></neighbor>
        <neighbor name=\"Switzerland\" direction=\"W\" ></neighbor>
    </country>
    <country name=\"Singpore\">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name=\"Malaysia\" direction=\"N\" ></neighbor>
    </country>
    <country name=\"Panama\">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name=\"Costa Rica\" direction=\"W\" ></neighbor>
        <neighbor name=\"Colombia\" direction=\"W\" ></neighbor>
    </country>
</data>
\'\'\'
xml_str2 = \'\'\'
<?xml version = \"1.0\" encoding=\"utf-8\" ?>
<data>
    <country name=\"Liechtenstein\">
        <rangk>1</rangk>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name=\"Austria\" direction=\"E\" ></neighbor>
        <neighbor name=\"Switzerland\" direction=\"W\" ></neighbor>
    </country>
    <country name=\"Singpore\">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name=\"Malaysia\" direction=\"N\" ></neighbor>
    </country>
    <country name=\"Panama\">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name=\"Costa Rica\" direction=\"W\" ></neighbor>
        <neighbor name=\"Colombia\" direction=\"W\" ></neighbor>
    </country>
</data>
\'\'\'

xml_str3 = \'\'\'
<?xml version = \"1.0\" encoding=\"utf-8\" ?>
<data>
    <class name=\"computer\">
        <rangk>1</rangk>
        <year>unknow</year>
        <addr>sz</addr>
        <book name=\"java programming\" price=\"10\" ></book>
        <book name=\"python programming\" price=\"10\" ></book>
    </class>
    <class name=\"philosophy\">
        <rangk>2</rangk>
        <year>unknown</year>
        <book name=\"A little history of philosophy\" price=\"15\" ></book>
        <book name=\"contemporary introduction\" price=\"15\" ></book>
    </class>
    <class name=\"history\">
        <rangk>3</rangk>
        <year>unknown</year>
        <addr>other addr</addr>
        <book name=\"The South China Sea\" price=\"10\" ></book>
        <book name=\"Chinese Among Others\" price=\"10\" ></book>
    </class>
</data>
\'\'\'

xml_str4 = \'\'\'
<?xml version = \"1.0\" encoding=\"utf-8\" ?>
<data>
    <class name=\"computer\">
        <year>unknow</year>
        <addr>sz</addr>
        <book name=\"java programming\" price=\"10\" ></book>
        <book name=\"python programming\" price=\"10\" ></book>
    </class>
    <class name=\"philosophy\">
        <year>unknown</year>
        <addr>other addr</addr>
        <book name=\"A little history of philosophy\" price=\"15\" ></book>
        <book name=\"contemporary introduction\" price=\"16\" ></book>
    </class>
</data>
\'\'\'


if __name__ == \'__main__\':
    res_list = compare_xml_strs(xml_str1, xml_str2)
    if res_list:
        print(\'xml1和xml2不一样:\\n%s\' % \'\\n\'.join(res_list))
    else:
        print(\'xml1和xml2一样\')

    res_list = compare_xml_strs(xml_str3, xml_str4)
    if res_list:
        print(\'xml3和xml4不一样:\\n%s\' % \'\\n\'.join(res_list))
    else:
        print(\'xml3和xml4一样\')

运行结果

xml1和xml2一样
xml3和xml4不一样:
子结点不一样：xml1结点(xpath：/data/class[1])比xml2结点(xpath：/data/class[1])多了以下子结点：
/data/class[1]/rangk[1]
属性不一样：xml1结点(xpath：/data/class[2]/book[2])，xml2结点(xpath：/data/class[2]/book[2])：
结点1属性：price 值：15，结点2属性：price 值：16
子结点不一样：xml1结点(xpath：/data/class[2])比xml2结点(xpath：/data/class[2])多了以下子结点：
/data/class[2]/rangk[1]
子结点不一样：xml1结点(xpath：/data/class[2])比xml2结点(xpath：/data/class[2])少了以下子结点：
/data/class[2]/addr[1]

作者：授客

微信/QQ：1033553122

全国软件测试QQ交流群：7156436

Git地址：https://gitee.com/ishouke

友情提示：限于时间仓促，文中可能存在错误，欢迎指正、评论！

作者五行缺钱，如果觉得文章对您有帮助，请扫描下边的二维码打赏作者，金额随意，您的支持将是我继续创作的源动力，打赏后如有任何疑问，请联系我!!!

           微信打赏
支付宝打赏                  全国软件测试交流QQ群
Python 基于xml.etree.ElementTree实现XML对比

来源：https://www.cnblogs.com/shouke/p/16975021.html
本站部分图文来源于网络，如有侵权请联系删除。

未经允许不得转载：百木园 » Python 基于xml.etree.ElementTree实现XML对比

标签：python

相关推荐

暂无文章

正在加载中...