在5月一号晚上,我参加了华中杯数学建模。对于数据处理,我用到了下面代码,希望能帮助到大家。
myDivision.py文件,用于对附件一的数据进行划分:
import pandas as pd #分析excel表格
import matplotlib.pyplot as plt # matplotlib中有很多可用的模块,我们使用pyplot模块
from numpy import *#读取excel表格并返回日期
def readExcel():df = pd.read_excel('附件1.xlsx',sheet_name='销量数据')return set(df['货号']) #按货号分#return set(df['日期']) # 按日期分#按货号进行分开
def changeGoods(x):df2 = pd.read_excel('附件1.xlsx',sheet_name='销量数据')#按货号分for i in range(len(x)):k = df2[df2['货号'] == x[i]]k.to_csv('huowu//{name}.csv'.format(name = x[i]), sep=',', header=True, index=True)#按日期进行分开
def changeDay(x):df2 = pd.read_excel('附件1.xlsx',sheet_name='销量数据')#按日期分for i in range(len(x)):k = df2[df2['日期'] == x[i]]# my_data = x[i][0:4] + x[i][5:7] + x[i][8:10]k.to_csv('data//{name}.csv'.format(name = x[i]), sep=',', header=True, index=True)#把每个日期和货物总量算出来
def summaryAll(x):sum = 0df = pd.read_csv(x)for m in range(len(df['销售件数'])):sum += df['销售件数'][m]return sum#创建表格
def createExcel(day,data):my_day = []for i in range(len(day)):# my_day.append(day[i][0:4] + '/' + day[i][4:6] + '/' +day[i][6:8])my_day.append(day[i][0:4] + day[i][4:6] + day[i][6:8])df1 = pd.DataFrame({'日期': my_day ,'总销量': data})df1.to_excel('每个日期的总销量二.xlsx', sheet_name='每天的总销量', startcol=0, index=False)def main():this = readExcel() #读取excel表格并返回日期my_list = list(this)#changeDay(my_list) #按日期进行分开changeGoods(my_list) # 按货号进行分开my_data = []for i in range(len(my_list)) :my_data.append(str(my_list[i][0:4]+my_list[i][5:7]+my_list[i][8:10]))my_data.sort()my_data_sum = []for j in range(len(my_data)):a = summaryAll('day/{my_day}.csv'.format(my_day = my_data[j]))my_data_sum.append(a)print(my_data)print(my_data_sum)data = []for k in range(len(my_data)):data.append(k)my_data_sum2 = []for n in range(len(my_data_sum)):my_data_sum2.append(math.log(my_data_sum[n]))print(my_data_sum2)createExcel(my_data, my_data_sum)if __name__ == '__main__':main()
myDivision2.py用于对每月的货号总销量进行汇总:
import pandas as pd #分析excel表格#读取excel表格并返回产品
def readExcelProdect():# df = pd.read_excel('附件一.xlsx',sheet_name='销量数据')df = pd.read_excel('附件1.xlsx', sheet_name='销量数据')return set(df['货号']) #按货物分#读取excel表格并返回日期
def readExcelDay():# df = pd.read_excel('附件一.xlsx',sheet_name='销量数据')df = pd.read_excel('附件1.xlsx', sheet_name='销量数据')return set(df['日期']) # 按日期分#生成汇总表
def createExcel(x,y):dict = {}dict[x[0]] = yfor i in range(1,len(x)):w = []for j in range(0,len(y)):w.append('')dict[x[i]] = wdf = pd.DataFrame(dict)df.to_excel('汇总.xlsx')#处理每一种货物的表格
def createData(x,day,goods):every_month = []for i in range(0,len(x['月份'])):every_month.append(str(x['月份'][i]))month = list(set(every_month))month.sort()all_num = []for i in range(len(month)):sum = 0my = x[x['月份'] == int(month[i])]n = list(my['销售件数'])for j in range(len(n)):sum += n[j]all_num.append(sum)# print(month)# print(all_num)#进行补零all_month = dayall_xiao_shou = []#print(day)n1 = 0for m in range(len(day)):if day[m] == month[0]:n1 = m #每个货物的月份开始日期n2 = n1 + len(month) - 1 #每个货物的二月份结束日期for n in range(len(day)):if n < n1 or n > n2 :all_xiao_shou.append(0)else:all_xiao_shou.append(all_num[n-n1])# print(day)print(all_xiao_shou)def main():this1 = readExcelProdect() # 读取excel表格并返回货号my_goods = list(this1)my_goods_new = []for i in range(len(my_goods)):my_goods_new.append(my_goods[i][2:7])my_goods_new.sort()this_goods = []for j in range(len(my_goods_new)):this_goods.append('SS'+my_goods_new[j])#print(this_goods)#print(len(this_goods))this2 = readExcelDay() # 读取excel表格并返回日期my_day = list(this2)my_day_new = []for i in range(len(my_day)):this_day = str(my_day[i])my_day_new.append(this_day[0:6])my_day_new = list(set(my_day_new))my_day_new.sort()# print(my_day_new)# createExcel(my_day_new, this_goods)df = pd.read_csv('huowu/SS61146.csv')createData(df, my_day_new, this_goods)for j in range(len(this_goods)):df = pd.read_csv('huowu/{n}.csv'.format(n = this_goods[j]))# print(this_goods[j])createData(df,my_day_new,this_goods)if __name__ == '__main__':main()
处理延期比数据并生成频率直方图,并把每个区间赋值为1-10以内数据,自动生成txt文件,方便matlib处理:
yanQiBi1.py
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap
import numpy as np
import matplotlib.pyplot as plt#读取按季度分的表格并统计每个区间的个数
def readExcel(x):yy = []a1 = [];a11 = [];a12 = [];a13 = []a2 = [];a3 = [];a4 = [];a5 = [];a6 = []for i in range(len(x['延期比'])):# if x['延期比'][i] >= 0 and x['延期比'][i] < 0.01:# a1.append(x['延期比'][i])#0-0.01# if x['延期比'][i] == 0:# # a11.append(x['延期比'][i])# a11.append(1)if x['延期比'][i] > 0 and x['延期比'][i] < 0.001:# a12.append(x['延期比'][i])a12.append(1)elif x['延期比'][i] >= 0.001 and x['延期比'][i] < 0.01:# a13.append(x['延期比'][i])a13.append(2)elif x['延期比'][i] >= 0.01 and x['延期比'][i] < 0.02:# a2.append(x['延期比'][i])a2.append(3)elif x['延期比'][i] >= 0.02 and x['延期比'][i] < 0.1:# a3.append(x['延期比'][i])a3.append(4)elif x['延期比'][i] >= 0.1 and x['延期比'][i] < 0.2:# a4.append(x['延期比'][i])a4.append(5)elif x['延期比'][i] >= 0.2 and x['延期比'][i] < 0.35:# a5.append(x['延期比'][i])a5.append(6)elif x['延期比'][i] >= 0.35 and x['延期比'][i] <= 0.5:# a6.append(x['延期比'][i])a6.append(7)# yy.append(len(a11))yy.append(len(a12))yy.append(len(a13))yy.append(len(a2))yy.append(len(a3))yy.append(len(a4))yy.append(len(a5))yy.append(len(a6))return yy#生成matlab需要数据
def createMatl(myData):print(myData)my_list = []for i in range(myData[0]):my_list.append(1)for i in range(myData[1]):my_list.append(2)for i in range(myData[2]):my_list.append(3)for i in range(myData[3]):my_list.append(4)for i in range(myData[4]):my_list.append(5)for i in range(myData[5]):my_list.append(6)for i in range(myData[6]):my_list.append(7)print(my_list)return my_list# with open('all_data.txt','w') as f:# f.write(str(my_list))def createChart(my_list):xx = [1,2,3,4,5,6,7]bar = Bar("柱形图-折线图")bar.add('bar', xx, my_list)line = Line()line.add('line', xx, my_list)overlap = Overlap()overlap.add(bar)overlap.add(line)overlap.show_config()overlap.render(path='第二题延期比.html')def main():df = pd.read_excel('附件二.xlsx',sheet_name='汇总')list = readExcel(df)#createChart(list)#createMatl(list)list = createMatl(list)# 求均值arr_mean = np.mean(list)# 求方差arr_var = np.var(list)# 求标准差arr_std = np.std(list)print(arr_mean)print(arr_var)print(arr_std)# plt.style.use('seaborn-white')# # 最基本的频次直方图命令# plt.hist(list)# plt.show()if __name__ == '__main__':main()
yanQiBi2.py
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap#读取按季度分的表格并统计每个区间的个数
def readExcel(x):yy = []a1 = [];a2 = [];a3 = [];a4 = [];a5 = []a6 = [];a7 = [];a8 = [];a9 = [];a10 = []for i in range(len(x['延期比'])):if x['延期比'][i] >= 0 and x['延期比'][i] < 0.1:# a1.append(x['延期比'][i])a1.append(1)elif x['延期比'][i] >= 0.1 and x['延期比'][i] < 0.2:# a2.append(x['延期比'][i])a2.append(2)elif x['延期比'][i] >= 0.2 and x['延期比'][i] < 0.3:#a3.append(x['延期比'][i])a3.append(3)elif x['延期比'][i] >= 0.3 and x['延期比'][i] < 0.4:# a4.append(x['延期比'][i])a4.append(4)elif x['延期比'][i] >= 0.4 and x['延期比'][i] < 0.5:# a5.append(x['延期比'][i])a5.append(5)elif x['延期比'][i] >= 0.5 and x['延期比'][i] < 0.6:# a6.append(x['延期比'][i])a6.append(6)elif x['延期比'][i] >= 0.6 and x['延期比'][i] < 0.7:# a7.append(x['延期比'][i])elif x['延期比'][i] >= 0.7 and x['延期比'][i] < 0.8:a8.append(x['延期比'][i])elif x['延期比'][i] >= 0.8 and x['延期比'][i] < 0.9:a9.append(x['延期比'][i])elif x['延期比'][i] >= 0.9 and x['延期比'][i] <= 1:a10.append(x['延期比'][i])yy.append(len(a1))yy.append(len(a2))yy.append(len(a3))yy.append(len(a4))yy.append(len(a5))yy.append(len(a6))yy.append(len(a7))yy.append(len(a8))yy.append(len(a9))yy.append(len(a10))print(yy)return yydef createChart(my_list):xx = ['0-0.01','0.1-0.2','0.2-0.3','0.3-0.4','0.4-0.5','0.5-0.6','0.6-0.7','0.7-0.8','0.8-0.9','0.9-1']bar = Bar("柱形图-折线图")bar.add('bar', xx, my_list)line = Line()line.add('line', xx, my_list)overlap = Overlap()overlap.add(bar)overlap.add(line)overlap.show_config()overlap.render(path='第三季度.html')def main():df = pd.read_excel('按季度分.xlsx',sheet_name='第三季度')list = readExcel(df)createChart(list)if __name__ == '__main__':main()
还有上新量的值类似:
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap
import numpy as np
import matplotlib.pyplot as plt#统计每个区间的个数
def readExcel(x):shang_xin = []for i in range(len(x['上新日销量'])):shang_xin.append(x['上新日销量'][i])shang_xin.sort()jian_ge = (max(shang_xin) - min(shang_xin))/10yy = []a1 = [];a2 = [];a3 = [];a4 = []a5 = [];a6 = [];a7 = [];a8 = []for i in range(len(x['上新日销量'])):if x['上新日销量'][i] >= shang_xin[0] and x['上新日销量'][i] < shang_xin[0] + jian_ge/5:a1.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5 and x['上新日销量'][i] < shang_xin[0] + jian_ge/5*2:a2.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge/5*3:a3.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5*3 and x['上新日销量'][i] < shang_xin[0] + jian_ge:a4.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge and x['上新日销量'][i] < shang_xin[0] + jian_ge*2/3*2:a5.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*2/3*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge*2:a6.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge*3:a7.append(x['上新日销量'][i])elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*3 and x['上新日销量'][i] <= shang_xin[0] + jian_ge*10:a8.append(x['上新日销量'][i])yy.append(len(a1))yy.append(len(a2))yy.append(len(a3))yy.append(len(a4))yy.append(len(a5))yy.append(len(a6))yy.append(len(a7))yy.append(len(a8))return yy#生成matlab需要数据
def createMatl(myData):print(myData)my_list = []for i in range(myData[0]):my_list.append(1)for i in range(myData[1]):my_list.append(2)for i in range(myData[2]):my_list.append(3)for i in range(myData[3]):my_list.append(4)for i in range(myData[4]):my_list.append(5)for i in range(myData[5]):my_list.append(6)for i in range(myData[6]):my_list.append(7)for i in range(myData[7]):my_list.append(8)#print(my_list)with open('第二题上新量.txt','w') as f:f.write(str(my_list))return my_list#生成图表
def createChart(my_list):xx = ['294-806','806-1319','1319-1832','1832-2857','2857-3712','3712-5421','5421-7984','7984-25930']bar = Bar("柱形图-折线图")bar.add('bar', xx, my_list)line = Line()line.add('line', xx, my_list)overlap = Overlap()overlap.add(bar)overlap.add(line)overlap.show_config()overlap.render(path='第二题上新量未赋值区间.html')def main():df = pd.read_excel('附件二.xlsx',sheet_name='汇总')list = readExcel(df) #统计每个区间的个数#createChart(list) #生成图表#createMatl(list)list = createMatl(list)# 求均值arr_mean = np.mean(list)# 求方差arr_var = np.var(list)# 求标准差arr_std = np.std(list)print(arr_mean)print(arr_var)print(arr_std)# plt.style.use('seaborn-white')# # 最基本的频次直方图命令# plt.hist(list)# plt.show()if __name__ == '__main__':main()
一些效果如下:
其他的数据类似,祝福大家有参加建模的同学能够马到成功,如果有需求的话,请联系qq:1657264184。题目的话,在建模完后会上传到资源下载区,有想研究的可以下载,地址为:https://download.csdn.net/download/itxiaoangzai/11158744