z-score算法原理参考网址
https://blog.csdn.net/m0_59596937/article/details/128378641
具体实现代码如下:
import numpy as npclass ZScoreOutlierDetector:def __init__(self, threshold=3):"""构造函数"""self.threshold = threshold # Z-score阈值,默认为3self.x_mean = None # X坐标的均值self.x_std = None # X坐标的标准差self.y_mean = None # Y坐标的均值self.y_std = None # Y坐标的标准差def fit(self, points):"""根据样本数据计算各维度(x,y维度)的均值和标准差"""points = np.array(points)x = points[:, 0]#:表示选择所有行,0表示选择第0列(通常是x坐标)y = points[:, 1]self.x_mean = np.mean(x)self.x_std = np.std(x)self.y_mean = np.mean(y)self.y_std = np.std(y)def _calc_z_score(self, val, mean, std):"""计算单个维度的Z-score,并且处理标准差为0的情况"""if std == 0:# 如果样本数据全相同,则当前值不同时视为无限大Z-score(即离群点)return float('inf') if val != mean else 0#python三元运算符#float('inf')表示无穷大return (val - mean) / stddef is_outlier(self, point):"""判断给定点是否为离群点"""x_z = self._calc_z_score(point[0], self.x_mean, self.x_std)y_z = self._calc_z_score(point[1], self.y_mean, self.y_std)# 任一维度超过阈值则视为离群点return abs(x_z) > self.threshold or abs(y_z) > self.threshold# 示例用法
if __name__ == "__main__":# 样本数据sample_points = [(1.0, 2.0),(1.5, 3.0),(1.2, 2.5),(1.8, 3.2),(0.9, 2.1)]# 初始化检测器并拟合数据detector = ZScoreOutlierDetector(threshold=3)detector.fit(sample_points)# 待检测点test_point = (5.0, 4.0)result = detector.is_outlier(test_point)print(f"检测点 {test_point} 是否为离群点:{result}")