首先计算出距离每个点e以内的点,如果个数>=minPts,则找出它的直接密度可达和间接可达的点,用visited标记点是否已经在簇中,循环直到最后一个点。
#include <fstream>
#include <vector>
#include <iostream>
#include <cmath>
#include <set>
#include <sstream>
using namespace std;struct Point
{double x;double y;
};double distance(const Point& a, const Point& b)
{return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
}vector <set<int>> getPointSet(const vector<Point> data, double e)
{vector<set<int>> PointSet(data.size()); //用到了PointSet[i],要先进行初始化//vector<set<int>> PointSet;for(int i=0;i<data.size();i++){for (int j = 0; j < data.size(); j++){if (distance(data[i], data[j]) <= e){PointSet[i].insert(j); //j还是j+1}}}return PointSet;
}vector<set<int>> DAIANA(vector<set<int>> point,int MinPts)
{vector<bool> visted(point.size(),false);vector<set<int>> cluster;for (int i = 0; i < point.size(); i++){if (!visted[i]){if (point[i].size() >= MinPts) //{几个} {for (set<int>::const_iterator j = point[i].begin(); j != point[i].end(); j++){visted[*j] = true;if (point[*j].size() >= MinPts) //间接相连{for (auto k = point[*j].begin(); k != point[*j].end(); k++){visted[*k] = true;point[i].insert(*k);}//point[i].insert(point[*j].begin(), point[*j].end());}}cluster.push_back(point[i]);}}}return cluster;
}vector<Point> ReadData(string filename)
{vector<Point> data;ifstream file(filename);if (file.is_open()){string line;while (getline(file, line)){istringstream iss(line);double x, y;string token;Point point;if (getline(iss, token, ',') && istringstream(token) >> point.x &&getline(iss, token, ',') && istringstream(token) >> point.y) {data.push_back(point);}}}else{cout << "open fail";}file.close();return data;
}int main()
{ vector<Point> dataset = ReadData("data.txt");double e;int MinPts;cin >> e >> MinPts;vector <set<int>> point;vector<set<int>> cluster;point=getPointSet(dataset, e);cluster=DAIANA(point, MinPts);for (int i = 0; i < cluster.size(); i++){cout << "{";int count = 0;for (auto j = cluster[i].begin(); j != cluster[i].end(); j++){cout << *j+1 ;if (++count < cluster[i].size()){cout << ",";}}cout << "}" << endl;}
}
或
判断是否是核心点,如果是,将它的直接密度可达点放进set<int> neighbors;再判断这些直接密度可达点里如果有核心点则又将该直接密度可达点放进set<int> subNeighbors,最后neighbors.insert(subNeighbors.begin(), subNeighbors.end());
std::vector<std::vector<Point>> dbscan(const std::vector<Point>& dataset, double epsilon, int minPts) {std::vector<std::vector<Point>> clusters;std::vector<bool> visited(dataset.size(), false);for (int i = 0; i < dataset.size(); ++i) {if (visited[i]) continue;visited[i] = true;std::vector<Point> cluster;std::set<int> neighbors;for (int j = 0; j < dataset.size(); ++j) {if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {neighbors.insert(j);}}if (neighbors.size() >= minPts) {for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {int index = *it;if (!visited[index]) {visited[index] = true;std::set<int> subNeighbors;for (int j = 0; j < dataset.size(); ++j) {if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {subNeighbors.insert(j);}}if (subNeighbors.size() >= minPts) {neighbors.insert(subNeighbors.begin(), subNeighbors.end());}}}}for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {cluster.push_back(dataset[*it]);}if (!cluster.empty()) {clusters.push_back(cluster);}}return clusters;
}
数据集:参考数据挖掘原理与算法第四版DBSCAN例子
1.0, 0.0
4.0, 0.0
0.0, 1.0
1.0, 1.0
2.0, 1.0
3.0,1.0
4.0,1.0
5.0,1.0
0.0,2.0
1.0,2.0
4.0,2.0
1.0,3.0
结果