MATLAB代码：机器学习-分类器

本文包含三种机器学习分类器的MATLAB实现方式代码块：支持向量机、决策树、逻辑回归。

SVM/支持向量机(Support Vector Machine)

原理

MATLAB实现

实例代码块

采用搜索确定参数

Decision Tree / 决策树

原理

MATLAB实现

实例代码块

Logistic Regression / 逻辑回归

原理

MATLAB实现

实例代码块

SVM/支持向量机(Support Vector Machine)

原理

详细原理可参考：机器学习（九）：支持向量机SVM(超详细理论基础）_支持向量机的分类模型-CSDN博客

MATLAB实现

实例代码块

1- 导入数据

%% 训练集比例
train_ratio = 0.8;  %可自行设置比例%% 导入数据
load('features.mat'); n = randperm(size(features,1));
train_num = floor(train_ratio * size(features,1));train_features = features(n(1:train_num),:);
train_labels = labels(n(1:train_num),:);test_features = features(n(train_num + 1:end),:);
test_labels = labels(n(train_num + 1:end),:);

2- 数据处理与设置

%% 参数设置c = 20; %根据需要设置合适的值
g = 1.5; %根据需要设置合适的值
acc = 0; %准确率初始化%% 数据处理[Train_features,PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply',test_features',PS);
Test_features = Test_features';

3- SVM核心代码

template = templateSVM('KernelFunction','rbf','BoxConstraint',c,'KernelScale',g);
h_model = waitbar(0, '正在训练多类别SVM模型...'); %进度条显示
model = fitcecoc(Train_features,train_labels,'Learners',template,'Coding','onevsone','Verbose',0);
close(h_model);save('my_trained_model.mat','model');  %保存模型

4- 后处理

%% 后处理[predict_label_train,~] = predict(model,Train_features);
train_accuracy = sum(strcmp(predict_label_train,train_labels)) / length(train_labels); % 计算训练集准确度[predict_label_test,~] = predict(model,Test_features);
test_accuracy = sum(strcmp(predict_label_test,test_labels)) / length(test_labels); % 计算测试集准确度confusion_matrix_train = confusionmat(train_labels,predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_train.png');confusion_matrix_test = confusionmat(test_labels,predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_test.png');disp(['训练集准确度: ',num2str(train_accuracy)]);
disp(['测试集准确度: ',num2str(test_accuracy)]);

采用搜索确定参数

可以采用随机搜索＋网格搜索的方式确定较优的参数值。

%% 搜索部分
%根据需要自行修改初始值和搜索范围
num_random_search = 30; 
bestc_random = 15;
bestg_random = 2;
bestacc_random = 0;
bestc = 15;
bestg = 0.7;
bestacc = 0;h_random = waitbar(0, '正在进行随机搜索寻找较优参数...');
for k = 1:num_random_searchc_random = 2 + (6 - (2)) * rand(); g_random = -3 + (2 - (-3)) * rand(); template_random = templateSVM('KernelFunction','rbf','BoxConstraint',2^c_random,'KernelScale',2^g_random);classifier_random = fitcecoc(Train_features,train_labels,'Learners',template_random,'Coding','onevsone','Verbose',0,'CrossVal','on','KFold',v);cg_random = kfoldLoss(classifier_random);if (1 - cg_random) > bestacc_randombestacc_random = 1 - cg_random;bestc_random = 2^c_random;bestg_random = 2^g_random;endwaitbar(k / num_random_search, h_random, sprintf('随机搜索已完成 %.2f%%', k / num_random_search * 100));
end
close(h_random); fprintf('Best c: %f\n', bestc_random);
fprintf('Best g: %f\n', bestg_random);c_center = log2(bestc_random);
g_center = log2(bestg_random);
c_range = 5; 
g_range = 0.3;
c_vec = linspace(c_center - c_range,c_center + c_range,10);
g_vec = linspace(c_center - g_range,c_center + g_range,10);
[c,g] = meshgrid(c_vec,g_vec);
[m,n] = size(c);
cg = zeros(m,n);
eps = 10^(-4);
v = 5; h_grid = waitbar(0, '正在通过网格搜索寻找最佳c/g参数...');
total_iterations_grid = m * n; 
current_iteration_grid = 0;
for i = 1:mfor j = 1:ntemplate = templateSVM('KernelFunction','rbf','BoxConstraint',2^c(i,j),'KernelScale',2^g(i,j));classifier = fitcecoc(Train_features,train_labels,'Learners',template,'Coding','onevsone','Verbose',0,'CrossVal','on','KFold',v);cg(i,j) = kfoldLoss(classifier); if (1 - cg(i,j)) > bestacc bestacc = 1 - cg(i,j);bestc = 2^c(i,j);bestg = 2^g(i,j);endif abs((1 - cg(i,j)) - bestacc )<=eps && bestc > 2^c(i,j) bestacc = 1 - cg(i,j);bestc = 2^c(i,j);bestg = 2^g(i,j);endcurrent_iteration_grid = current_iteration_grid + 1;waitbar(current_iteration_grid / total_iterations_grid, h_grid, sprintf('网格搜索已完成 %.2f%%', current_iteration_grid / total_iterations_grid * 100));end
end
close(h_grid);

Decision Tree / 决策树

原理

详细原理可参考：

决策树(Decision Tree)-CSDN博客

MATLAB实现

实例代码块

1-数据导入与归一化

%% 设置训练集比例
train_ratio = 0.8; %% 设置交叉验证折数
v = 5; %% 导入数据
load('features.mat'); n = randperm(size(features,1));
train_num = floor(train_ratio * size(features,1)); 
% 训练集
train_features = features(n(1:train_num),:);
train_labels = labels(n(1:train_num),:);
% 测试集
test_features = features(n(train_num + 1:end),:);
test_labels = labels(n(train_num + 1:end),:);%% 数据归一化
[Train_features,PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply',test_features',PS);
Test_features = Test_features';

2-参数网格搜索

%% 网格搜索：调整决策树参数（深度、最小叶子节点样本数、分裂标准等）
best_depth = 180; 
best_minLeaf = 3; 
best_splitCriterion = 'gdi'; 
best_accuracy = 0;depth_vec = 50:10:150; 
minLeaf_vec = 1:10; 
splitCriteria = {'gdi', 'deviance'}; h_search = waitbar(0, '正在通过搜索寻找最佳决策树参数...');for d = depth_vecfor m = minLeaf_vecfor sc = splitCriteriatemplate = templateTree('MaxNumSplits', d, 'MinLeafSize', m, 'SplitCriterion', sc{1});classifier = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0, 'CrossVal', 'on', 'KFold', v);cg = kfoldLoss(classifier); acc = 1 - cg; if acc > best_accuracybest_accuracy = acc;best_depth = d;best_minLeaf = m;best_splitCriterion = sc{1};endendend
end
close(h_search); fprintf('最佳深度: %d\n', best_depth);
fprintf('最佳最小叶子节点样本数: %d\n', best_minLeaf);
fprintf('最佳分裂标准: %s\n', best_splitCriterion);

3-核心代码

%% 模型
template = templateTree('MaxNumSplits', best_depth, 'MinLeafSize', best_minLeaf, 'SplitCriterion', best_splitCriterion);
model = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);save('my_trained_model_tree.mat', 'model');

4-后处理

%% 
[predict_label_train, ~] = predict(model, Train_features);
train_accuracy = sum(strcmp(predict_label_train, train_labels)) / length(train_labels); % 计算训练集准确度[predict_label_test, ~] = predict(model, Test_features);
test_accuracy = sum(strcmp(predict_label_test, test_labels)) / length(test_labels); % 计算测试集准确度confusion_matrix_train = confusionmat(train_labels, predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_tree2train.png');confusion_matrix_test = confusionmat(test_labels, predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_tree2test.png');disp(['训练集准确度: ', num2str(train_accuracy)]);
disp(['测试集准确度: ', num2str(test_accuracy)]);

Logistic Regression / 逻辑回归

原理

详细原理可参考：

逻辑回归（Logistic Regression）-CSDN博客

MATLAB实现

实例代码块

1-数据导入与归一化

%% 训练集比例
train_ratio = 0.85; %% 交叉验证折数
v = 5; %% 导入数据
load('features.mat'); n = randperm(size(features, 1));
train_num = floor(train_ratio * size(features, 1)); train_features = features(n(1:train_num), :);
train_labels = labels(n(1:train_num), :);test_features = features(n(train_num + 1:end), :);
test_labels = labels(n(train_num + 1:end), :);%% 数据归一化
[Train_features, PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply', test_features', PS);
Test_features = Test_features';

2-参数网格搜索

%% 网格搜索%lambda_range = logspace(-5, 5, 50); 
best_lambda = 1e-7;
best_accuracy = 0;h_grid = waitbar(0, '正在进行网格搜索调整 Lambda 参数...');for i = 1:length(lambda_range)lambda = lambda_range(i);template = templateLinear('Learner', 'logistic', 'Regularization', 'ridge', 'Lambda', lambda);cv = cvpartition(train_labels, 'KFold', v);cv_accuracy = zeros(cv.NumTestSets, 1);for j = 1:cv.NumTestSetstrain_idx = cv.training(j);test_idx = cv.test(j);model = fitcecoc(Train_features(train_idx, :), train_labels(train_idx), 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);predict_label = predict(model, Train_features(test_idx, :));cv_accuracy(j) = sum(strcmp(predict_label, train_labels(test_idx))) / length(test_idx);endmean_accuracy = mean(cv_accuracy);if mean_accuracy > best_accuracybest_accuracy = mean_accuracy;best_lambda = lambda;endwaitbar(i / length(lambda_range), h_grid, sprintf('Lambda 参数调整中... %.2f%%', i / length(lambda_range) * 100));
endclose(h_grid);
fprintf('最佳 Lambda: %f\n', best_lambda);

3-核心代码

%% 模型
template = templateLinear('Learner', 'logistic', 'Regularization', 'ridge', 'Lambda', best_lambda);
h_model = waitbar(0, '正在训练 Logistic 回归模型...');
model = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);
close(h_model); save('my_trained_model_logistic.mat', 'model');

4-后处理

%% 
[predict_label_train, ~] = predict(model, Train_features);
train_accuracy = sum(strcmp(predict_label_train, train_labels)) / length(train_labels); % 计算训练集准确度[predict_label_test, ~] = predict(model, Test_features);
test_accuracy = sum(strcmp(predict_label_test, test_labels)) / length(test_labels); % 计算测试集准确度confusion_matrix_train = confusionmat(train_labels, predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_log2_train.png');confusion_matrix_test = confusionmat(test_labels, predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_log2_test.png');disp(['训练集准确度: ', num2str(train_accuracy)]);
disp(['测试集准确度: ', num2str(test_accuracy)]);