python+requests+pandas实现数据对比,输出excel指定模板结果数据

雷阳洪

2022-03-08 17:49 IP属地: 吉林

教程介绍:

该教程主要校验思路是根据news_id获取新闻和新闻主题数据,调用接口获取企业数据(实际结果)
根据news_id获取csv中符合条件的数据,获取企业数据(预期结果),两者比较,输出不相等的数据(excel输出,日志输出)

注意! 公司项目不同,仅供参考,重在思路理解

执行脚本 test_match_company.py

# -*- coding: utf-8 -*- import os, pytest, json import pandas as pd from jsonpath import jsonpath import requests from util.data.xlwt_tool import write_excel from util.report.logger_tool import Logger class TestMatchCompany(): real_path = os.path.split(os.path.realpath(__file__))[0] news_path = real_path + os.sep + "../data/news_content.txt" news = pd.read_csv(news_path, sep='^') news1 = news.values.tolist() def setup_class(self): algo_name = "match_company" # host = "IP地址" host = "ip地址" port = "端口号" # match_company_url self.url = "http://" + host + ':' + str(port) + '/algorithm/process/' + algo_name real_path = os.path.split(os.path.realpath(__file__))[0] labels_path = real_path + os.sep + "../data/news_company_label.csv" self.labels_expected = pd.read_csv(labels_path, sep=';') # self.baseline_cols = ['chinesename','emotionindicator','stockcode'] self.baseline_cols = ['stockcode', 'companyid'] self.logging = Logger('../logs/{}.log'.format(algo_name), level='error') # self.logging = Logger('../logs/{}.log'.format(algo_name), level='info') def test_company_labels(self): excel_data_list = [] for i in self.news1: # news_id = str(i[0]) news_id = i[0] title = i[1] content = i[2] # 获取match_company算法接口的结果数据 parameter1 = {'title': title, 'content': content, 'extr_method': 1, 'use_skip': 0} # extr_method默认值为0，需要设为1调用，且没有计划修改默认值 payload1 = {'parameter': json.dumps(parameter1, ensure_ascii=False)} response1 = requests.post(self.url, data=payload1).json() json_data = jsonpath(response1,"$..com") shijijieguo = [] for i in json_data[0]: code = jsonpath(i,"$..code") comcode = jsonpath(i,"$..comcode") shijijieguo.append([code[0],comcode[0]]) shijijieguo.sort() # 根据news_id获取csv文件中符合条件的数据 expected = self.labels_expected baseline_result = expected[expected['newsid'] == news_id][self.baseline_cols] data_expect = baseline_result.values.tolist() for i in range(len(data_expect)): if data_expect[i][0] == 'csf': data_expect[i][0] = '' data_expect.sort() # 判断各种异常情况 if data_expect == [] and shijijieguo == []: # excel_data_list.append(["csv和接口都没查到:"+str(news_id), str(data_expect), str(shijijieguo)]) self.logging.debug( "接口和csv文件都没有查到企业新闻数据,不做对比 news_id:{} 预期结果为空:{} 接口实际结果为空:{}".format(news_id, data_expect, shijijieguo)) elif data_expect == [] and shijijieguo != []: excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)]) self.logging.error("对比不一致 news_id:{} 预期结果为空:{} 接口实际结果不为空:{}".format(news_id, data_expect, shijijieguo)) elif data_expect != [] and shijijieguo == []: excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)]) self.logging.error("对比不一致 news_id:{} 预期结果为空:{} 接口实际结果不为空:{}".format(news_id, data_expect, shijijieguo)) elif data_expect != [] and shijijieguo != []: if data_expect != shijijieguo: excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)]) self.logging.error("对比不一致 news_id:{} 预期结果:{} 实际结果:{}".format(news_id, data_expect, shijijieguo)) else: # excel_data_list.append(["一致:"+str(news_id), str(data_expect), str(shijijieguo)]) self.logging.info("对比一致 news_id:{} 预期结果:{} 实际结果:{}".format(news_id, data_expect, shijijieguo)) # 将日志输出到excel write_excel("../logs/match_company_error_{}.xls".format(len(excel_data_list)), ['news_id', '预期结果', '实际结果'], excel_data_list) 结果输出工具 write_excel.py #!/usr/bin/env python # -*- coding: utf-8 -*- # @Author : Leiyh # @File : pandas_tool.py import xlwt def write_excel(file_name,data_title,data_list,encoding='utf-8'): :param file_name: 文件路径地址 :param data_title: excel第一行的标题栏 :param data_list: 二维数据列表 :param encoding: :return: # 创建workbook和sheet对象注意Workbook的开头W要大写 workbook = xlwt.Workbook(encoding=encoding) # 添加一个名为sheet1的表 sheet1 = workbook.add_sheet('sheet1', cell_overwrite_ok=True) # 向表头写入数据 for i in range(len(data_title)): sheet1.write(0, i, str(data_title[i])) # 向sheet写入数据 for i in range(len(data_list)): for j in range(len(data_title)): sheet1.write(i + 1, j, str(data_list[i][j])) # 保存数据到‘Workbook2.xls’文件中 workbook.save(file_name) print('创建execel完成！') if __name__ == '__main__': # data = get_test_case("C:/softwareData/PycharmProjects/s00-wuling/documents/user/注册接口sign_up.xlsx") # print(data[0]) # print(data[1]) # write_excel("match_company.xls", ['news_id', '预期结果', '实际结果'], # [['35942860', str(['', 'ICN5025197980', 1]), str(['', 'ICN5025197980', 1])]]) write_excel("match_company.xls", ['news_id', '预期结果', '实际结果'], [['35942860', "['', 'ICN5025197980', 1]", "['', 'ICN5025197980', 1]"]]) 日志输出工具 logger_tool.py #!/usr/bin/env python # -*- coding: utf-8 -*- # @Author : Leiyh # @File : logger_tool.py import logging class Logger(object): level_relations = { 'notset':logging.NOTSET, 'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'crit': logging.CRITICAL def __init__(self, filename, filemode='w', level='notset', format='%(asctime)s - %(levelname)s: %(message)s'): logger = logging.getLogger() logger.setLevel(level=self.level_relations.get(level)) filehandle = logging.FileHandler(filename,filemode) formatter = logging.Formatter(format) filehandle.setFormatter(formatter) logger.addHandler(filehandle) def debug(self, msg): logging.debug(msg) def info(self, msg): logging.info(msg) def warning(self, msg): logging.warning(msg) def error(self, msg): logging.error(msg) def critical(self, msg): logging.critical(msg) if __name__ == '__main__': logger = Logger('all.log', level='info') logger.info("Start print log") logger.debug("Do something") logger.warning("Something maybe fail") logger.error("error print log")

最后编辑于：2023-05-26 17:34