2021高教社杯全国大学生数学建模竞赛C题 Python代码演示

- 问题一
- - 1.1 根据附件 1，对 402 家供应商的供货特征进行量化分析
  - - 计算供货特征
    - 数据标准化
    - - 对正向指标归一化
      - 对负向指标归一化
  - 1.2 建立反映保障企业生产重要性的数学模型
  - - 熵权法
    - 熵权法-TOPSIS
    - AHP
  - 1.3 在此基础上确定 50 家最重要的供应商，并在论文中列表给出结果。
- 问题二
- - 2.1 参考问题 1，该企业应至少选择多少家供应商供应原材料才可能满足生产的需求？
  - - 遗传算法
    - 差异进化
    - 粒子群优化（PSO）

import pandas as pd
import warnings
warnings.filterwarnings("ignore")path = '/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/'
d1 = pd.read_excel((path + '附件1 近5年402家供应商的相关数据.xlsx'), sheet_name='企业的订货量（m³）')
d2 = pd.read_excel((path + '附件1 近5年402家供应商的相关数据.xlsx'), sheet_name='供应商的供货量（m³）')
d3 = pd.read_excel((path + '附件2 近5年8家转运商的相关数据.xlsx'))

问题一

1.1 根据附件 1，对 402 家供应商的供货特征进行量化分析

https://dxs.moe.gov.cn/zx/a/hd_sxjm_sxjmlw_2021qgdxssxjmjslwzs/211025/1734085.shtml
在这里插入图片描述

计算供货特征

供货次数（正向）

d2_sub = d2.iloc[:,2:]
supply_times = d2_sub.apply(lambda x: sum(x!=0), axis=1)

平均供货量（正向）

supply_quantity_mean = d2_sub.apply(lambda x: sum(x), axis=1) / supply_times

单次最大供货量（正向）

supply_max = d2_sub.apply(lambda x: max(x), axis=1)

供货稳定性（负向）

d1_sub = d1.iloc[:,2:]
d12_sub = d1_sub.subtract(d2_sub) ** 2
supply_stability = d12_sub.apply(lambda x: sum(x), axis=1) / supply_times

供货连续性

间隔次数（负向）

import numpy as npgap_times = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):a = d2_sub.iloc[i,:] == 0gap_times[i] = (a&~np.r_[[False],a[:-1]]).sum()

平均间隔周数（负向）

gap_weeks_mean = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):index = [0] + list(np.where(d2_sub.iloc[i,:] != 0)[0]) + [241]new = np.diff(index)gap_weeks_mean[i] = sum(new[np.where((new != 1) & (new != 0))])gap_weeks_mean = gap_weeks_mean / supply_times

平均连续供货周数（正向）

supply_weeks_mean = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):index = np.where(d2_sub.iloc[i,:] != 0)[0]new = np.where(np.diff(index) == 1)[0]supply_weeks_mean[i] = len(new) * 2 - len(np.where(np.diff(new) == 1)[0])supply_weeks_mean = supply_weeks_mean / supply_times

合理供货比例（正向）

df = pd.DataFrame(None, columns=list(d2_sub.columns),index=list(d2_sub.index))for i in range(0,d2_sub.shape[0]):for j in range(0,d2_sub.shape[1]):if d1_sub.iloc[i,j] == 0:df.iloc[i,j] = 0if (d2_sub.iloc[i,j] > d1_sub.iloc[i,j] * 0.8) and (d2_sub.iloc[i,j] < d1_sub.iloc[i,j] * 1.2):df.iloc[i,j] = Trueelse:df.iloc[i,j] = Falsesupply_proportion = df.apply(lambda x: sum(x), axis=1) / supply_times

数据标准化

df = pd.DataFrame({'供货次数': supply_times,'平均供货量': supply_quantity_mean,'单次最大供货量': supply_max,'供货稳定性': supply_stability,'间隔次数': gap_times,'平均间隔周数': gap_weeks_mean,'平均连续供货周数': supply_weeks_mean,'合理供货比例': supply_proportion})df.shape

(402, 8)

对正向指标归一化

df_positive = df[['供货次数','平均供货量','单次最大供货量','平均连续供货周数','合理供货比例']]
df_positive_norm = df_positive.apply(lambda x: (x-min(x)) / (max(x)-min(x)), axis=0)
df_positive_norm.head()

	供货次数	平均供货量	单次最大供货量	平均连续供货周数	合理供货比例
0	0.100418	0.000328	0.000135	0.360000	0.360000
1	0.292887	0.000972	0.001785	0.577465	0.507042
2	0.794979	0.023157	0.010441	0.973822	0.727749
3	0.133891	0.000321	0.000189	0.636364	0.363636
4	0.443515	0.021727	0.003435	1.000000	0.859813

对负向指标归一化

df_negative = df[['供货稳定性','间隔次数','平均间隔周数']]
df_negative_norm = df_negative.apply(lambda x: (max(x)-x) / (max(x)-min(x)), axis=0)
df_negative_norm.head()

	供货稳定性	间隔次数	平均间隔周数
0	0.999999	0.809091	0.960863
1	1.000000	0.590909	0.987411
2	0.999988	0.863636	0.998601
3	0.999993	0.809091	0.971365
4	1.000000	0.945455	0.994567

# merge data
df_norm = pd.concat([df_positive_norm, df_negative_norm], axis=1, join='inner')

1.2 建立反映保障企业生产重要性的数学模型

https://www.bilibili.com/read/cv12741665/
https://github.com/Valdecy/pyDecision

熵权法

https://www.kaggle.com/code/alpayabbaszade/entropy-topsis

首先对供货连续性下的3个二级指标进行加权

supply_continuity = df_norm[['间隔次数','平均间隔周数','平均连续供货周数']]

#Normalize Decision matrix
def norm(X):return X/X.sum()supply_continuity_norm = norm(supply_continuity)

#Entropy Values
k = -(1/np.log(supply_continuity_norm.shape[0]))def entropy(X):return (X*np.log(X)).sum()*kentropy = entropy(supply_continuity_norm)#degree of differentiation
dod = 1 - entropy
w = dod/dod.sum()
weights = w.sort_values(ascending = False)
weights

平均连续供货周数    0.594747
间隔次数        0.384353
平均间隔周数      0.020900
dtype: float64

supply_continuity_weighted = supply_continuity['间隔次数']*weights.iloc[0] + supply_continuity['平均间隔周数']*weights.iloc[1] + supply_continuity['平均连续供货周数']*weights.iloc[2]
df_norm.drop(['间隔次数','平均间隔周数','平均连续供货周数'], axis=1, inplace=True)
df_norm['供货连续性'] = supply_continuity_weighted
df_norm.head(3)

	供货次数	平均供货量	单次最大供货量	合理供货比例	供货稳定性	供货连续性
0	0.100418	0.000328	0.000135	0.360000	0.999999	0.858039
1	0.292887	0.000972	0.001785	0.507042	1.000000	0.743025
2	0.794979	0.023157	0.010441	0.727749	0.999988	0.917813

对6个一级指标进行加权

#Normalize Decision matrix
def norm(X):return X/X.sum()df_norm_new = norm(df_norm)#Entropy Values
k = -(1/np.log(df_norm_new.shape[0]))def entropy(X):return (X*np.log(X)).sum()*kentropy = entropy(df_norm_new)#degree of differentiation
dod = 1 - entropy
w = dod/dod.sum()
weights_entropy = w.sort_values(ascending = False)
weights_entropy

单次最大供货量    0.496440
平均供货量      0.392450
供货次数       0.091647
合理供货比例     0.016205
供货连续性      0.002825
供货稳定性      0.000433
dtype: float64

熵权法-TOPSIS

https://www.kaggle.com/code/alpayabbaszade/entropy-topsis
https://blog.csdn.net/qq_42374697/article/details/105901229

def norm(X):return X/np.sqrt((X**2).sum())norm_matrix = norm(df_norm)
w_norm_matrix = norm_matrix*wV_plus = w_norm_matrix.apply(max)
V_minus = w_norm_matrix.apply(min)S_plus = np.sqrt(((w_norm_matrix - V_plus)**2).apply(sum, axis = 1))
S_minus = np.sqrt(((w_norm_matrix - V_minus)**2).apply(sum, axis = 1))
scores = S_minus/(S_plus + S_minus)

d2['综合得分'] = scores * 100
output = d2[['供应商ID','综合得分']]# sort by scores
output = output.sort_values('综合得分', ascending=False)
output.iloc[0:50,:].to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_top50.csv')

output.iloc[0:50,:].head()

	供应商ID	综合得分
200	S201	87.972335
347	S348	57.756055
139	S140	52.993102
150	S151	44.951646
373	S374	41.858722

output.to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_all.csv')

AHP

https://www.mindtools.com/a7y139c/the-analytic-hierarchy-process-ahp
https://github.com/PhilipGriffith/AHPy

df_norm.head()

	供货次数	平均供货量	单次最大供货量	合理供货比例	供货稳定性	供货连续性
0	0.100418	0.000328	0.000135	0.360000	0.999999	0.858039
1	0.292887	0.000972	0.001785	0.507042	1.000000	0.743025
2	0.794979	0.023157	0.010441	0.727749	0.999988	0.917813
3	0.133891	0.000321	0.000189	0.363636	0.999993	0.867852
4	0.443515	0.021727	0.003435	0.859813	1.000000	0.965471

import ahpy
comparisons = {('供货次数', '平均供货量'): 3, ('供货次数', '单次最大供货量'): 5, ('供货次数', '合理供货比例'): 5, ('供货次数', '供货稳定性'): 5, ('供货次数', '供货连续性'): 5, ('平均供货量', '单次最大供货量'): 5, ('平均供货量', '合理供货比例'): 3, ('平均供货量', '供货稳定性'): 3, ('平均供货量', '供货连续性'): 3,('单次最大供货量', '合理供货比例'): 1/3, ('单次最大供货量', '供货稳定性'): 1/3, ('单次最大供货量', '供货连续性'): 1/3,('合理供货比例', '供货稳定性'): 1, ('合理供货比例', '供货连续性'): 1,('供货稳定性', '供货连续性'): 1}
comparisons

{('供货次数', '平均供货量'): 3,('供货次数', '单次最大供货量'): 5,('供货次数', '合理供货比例'): 5,('供货次数', '供货稳定性'): 5,('供货次数', '供货连续性'): 5,('平均供货量', '单次最大供货量'): 5,('平均供货量', '合理供货比例'): 3,('平均供货量', '供货稳定性'): 3,('平均供货量', '供货连续性'): 3,('单次最大供货量', '合理供货比例'): 0.3333333333333333,('单次最大供货量', '供货稳定性'): 0.3333333333333333,('单次最大供货量', '供货连续性'): 0.3333333333333333,('合理供货比例', '供货稳定性'): 1,('合理供货比例', '供货连续性'): 1,('供货稳定性', '供货连续性'): 1}

cal = ahpy.Compare(name='Drinks', comparisons=comparisons, precision=3, random_index='saaty')
cal.target_weights

{'供货次数': 0.445,'平均供货量': 0.232,'合理供货比例': 0.093,'供货稳定性': 0.093,'供货连续性': 0.093,'单次最大供货量': 0.044}

cal.consistency_ratio

0.032

CR < 0.1, 可认为判断矩阵的一致性可以接受

1.3 在此基础上确定 50 家最重要的供应商，并在论文中列表给出结果。

将熵权法和AHP得到的权重进行平均，得到最终的指标权重，然后加权计算各供应商的得分

weights_ahp = pd.DataFrame.from_dict(cal.target_weights, orient='index',columns=['AHP权重'])

import statisticsresults = pd.concat([weights_ahp, weights_entropy], axis=1)
results.columns = ['AHP权重','熵权法权重']
results['最终权重'] = results.apply(lambda x: statistics.mean(x), axis=1)
results

	AHP权重	熵权法权重	最终权重
供货次数	0.445	0.091647	0.268324
平均供货量	0.232	0.392450	0.312225
合理供货比例	0.093	0.016205	0.054603
供货稳定性	0.093	0.000433	0.046716
供货连续性	0.093	0.002825	0.047912
单次最大供货量	0.044	0.496440	0.270220

d2['综合得分2'] = (df_norm['供货次数']*0.268324 + df_norm['平均供货量']*0.312225 + df_norm['合理供货比例']*0.054603 + df_norm['供货稳定性']*0.046716 + df_norm['供货连续性']*0.047912 + df_norm['单次最大供货量']*0.270220)*300
output = d2[['供应商ID','综合得分2']]# sort by scores
output = output.sort_values('综合得分2', ascending=False)
output.iloc[0:50,:].to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_top50_AHP&Entropy.csv')

# 对排名前10的供应商可视化
df = output.iloc[0:10,:]
df = df.sort_values(by='综合得分2')
df

	供应商ID	综合得分2
307	S308	160.745089
329	S330	164.460643
107	S108	174.260128
360	S361	174.874852
373	S374	175.082953
228	S229	179.242563
139	S140	196.710758
150	S151	197.078972
200	S201	199.599719
347	S348	200.332317

# Horizontal lollipop plot
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np# Linux show Chinese characters *** important
plt.rcParams['font.family'] = 'WenQuanYi Micro Hei' 
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300my_range=range(1,11)
plt.hlines(y=my_range, xmin=0, xmax=df['综合得分2'], color='skyblue')
plt.plot(df['综合得分2'], my_range, "o")# Add titles and axis names
plt.yticks(my_range, df['供应商ID'])
plt.title("综合得分前10的供应商")
plt.xlabel('供应商综合得分')
plt.ylabel('供应商ID')# Show the plot
plt.show()

在这里插入图片描述

问题二

Pyhon中智能算法的模块：

https://scikit-opt.github.io/scikit-opt/#/en/README

Python中优化的模块

https://docs.scipy.org/doc/scipy/tutorial/optimize.html

formular editor:

https://editor.codecogs.com/

2.1 参考问题 1，该企业应至少选择多少家供应商供应原材料才可能满足生产的需求？

在这里插入图片描述

平均损失率
$\alpha$

loss_rate = sum(d3.iloc[:,1:].apply(lambda x: sum(x) / sum(x!=0), axis=0)) / 240
loss_rate

1.3063532832341274

供货量 $\widehat{x}_{i,t}$

supply_quantity_mean[:6]

0     1.960000
1     3.845070
2    68.785340
3     1.939394
4    64.598131
5     2.307692
dtype: float64

供应商得分 $s_{i}$

scores = pd.read_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_all.csv')
scores = scores.iloc[:,1:]

index_A = np.where(d2['材料分类'] == 'A')[0]
index_B = np.where(d2['材料分类'] == 'B')[0]
index_C = np.where(d2['材料分类'] == 'C')[0]

import numpy as npdef func(y):# input y is a list with 402 dims# set y as inty = np.around(np.array(y))res = sum(y) / sum(y * scores['综合得分'])return res

constraint_eq = [lambda y: sum((np.around(np.array(y))[index_A] * supply_quantity_mean[index_A] * (1-loss_rate/100)) / 0.6) + sum((np.around(np.array(y))[index_B] * supply_quantity_mean[index_B] * (1-loss_rate/100)) / 0.66) + sum((np.around(np.array(y))[index_C] * supply_quantity_mean[index_C] * (1-loss_rate/100)) / 0.72) - 2.82 * 10**4
]

遗传算法

from sko.GA import GA
y_len = 402ga = GA(func=func, n_dim=y_len, size_pop=50, max_iter=800, prob_mut=0.001, lb=[0]*y_len, ub=[1]*y_len, precision=1,constraint_eq=constraint_eq)
best_y_ga = ga.run()

suppliers_index_ga = np.where(best_y_ga[0] == 1)[0].tolist()
suppliers_ga = d2.iloc[suppliers_index_ga,0:2]
print('选择的供应商数量：', suppliers_ga.shape[0])

选择的供应商数量： 210

差异进化

from sko.DE import DE
y_len = 402
de = DE(func=func, n_dim=y_len, size_pop=50, max_iter=800, lb=[0]*y_len, ub=[1]*y_len,constraint_eq=constraint_eq)best_y_de = de.run()

y_de = np.around(best_y_de[0])
suppliers_index_de = np.where(y_de == 1)[0].tolist()
suppliers_de = d2.iloc[suppliers_index_de,0:2]
print('选择的供应商数量：', suppliers_de.shape[0])

选择的供应商数量： 184

粒子群优化（PSO）

from sko.PSO import PSO
y_len = 402
pso = PSO(func=func, n_dim=y_len, pop=40, max_iter=150, lb=[0]*y_len, ub=[1]*y_len, constraint_eq=constraint_eq)
best_y_pso = pso.run()

y_pso = np.around(best_y_pso[0])
suppliers_index_pso = np.where(y_pso == 1)[0].tolist()
suppliers_pso = d2.iloc[suppliers_index_pso,0:2]
print('选择的供应商数量：', suppliers_pso.shape[0])

选择的供应商数量： 152

import matplotlib.pyplot as plt
# Linux show Chinese characters *** important
plt.rcParams['font.family'] = 'WenQuanYi Micro Hei' 
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300plt.plot(pso.gbest_y_hist)
plt.title("目标函数的迭代过程")
plt.xlabel('迭代次数')
plt.ylabel('目标函数值')
plt.show()