2017.04.10:python数据可视化01

来源:互联网 发布:有数据分析软件吗 编辑:程序博客网 时间:2024/04/20 22:01

def is_outlier(points, threshold=3.5):    """    Returns a boolean array with True if points are outliers and False     otherwise.        Data points with a modified z-score greater than this     # value will be classified as outliers.    """    # transform into vector    if len(points.shape) == 1:        points = points[:,None]    # compute median value    # axis=0表述列; axis=1,表述行    median = np.median(points, axis=0)        # compute diff sums along the axis    diff = np.sum((points - median)**2, axis=-1)    diff = np.sqrt(diff)    # compute MAD    med_abs_deviation = np.median(diff)        # compute modified Z-score    # http://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm#Iglewicz    modified_z_score = 0.6745 * diff / med_abs_deviation    # return a mask for each outlier    return modified_z_score > threshold


0 0