facenet chinese whispers(face cluster)

来源：互联网发布：现货黄金软件下载编辑：程序博客网时间：2024/06/05 09:50

对于facenet模型，可以使用facenet对人脸进行embedding，但是如果想对人脸进行聚类，而你不知道有几个人，那怎么办？这种情况可以应用于：
1 在监控中有一堆人脸，要找出相似的人脸
2 对于人脸识别数据集，要对数据集进行清洗
现有的聚类方法，大多需要知道有几个类别，再去进行聚类，典型的如k means，但是如果不知道有几个类别，需要算法自动查找类别个数并进行快速聚类，有没有这么好的方法？
有！答案就是chinese whispers，该方法基于图进行聚类，将图中一个节点对应一个人脸，节点之间的边对应两个节点的相似度，也就是两个人脸的相似度，通过迭代查找一个节点对应的相似权重累加和来查找类别并进行聚类，使用facenet embedding得到的特征对ms-celeb数据集进行cluster，经过检验，模型和阈值选择适当的时候,只经过10次迭代，就可以达到比较好的效果。

该算法的结果主要依赖于模型的效果和阈值的选择,在迭代时,将相似度作为权重.

该算法核心:

0 构建无向图,将每个人脸做为无向图中的一个节点,人脸之间的相似度,作为节点之间的边,如果人脸之间的相似度小于上面设定的阈值

那么.这两个人脸对应的节点之间就没有边,

1 迭代开始时,将每个人脸都赋予一个id,该id作为该人脸的类别,也就是说初始化时,每个人脸都是一个类别

2 开始第一次迭代,随机选取某个节点,对该节点的所有邻居依次进行下面的处理:

a 如果是初始化的时候,由于每个节点都有自己所属的类别,就将所有邻居中权重最大的节点对应的类做为该节点的类别,完成对该节点的类别更新

b 如果迭代到第2次,那么对某个节点,就可能会出现有两个邻居属于同一个类,那么就将同一个类下的邻居权重累加,最后,再看该节点下的所有邻居节点所属的类别的累加权重,取权重最大的类别作为当前节点的类别.

3 当所有的节点都完成后,就完成了一次迭代,重复2步骤,直到达到迭代次数.

基于facenet embedding的chinese whispers聚类算法应用于数据集清洗代码：

https://github.com/zhly0/facenet-face-cluster-chinese-whispers-

使用推荐:

https://github.com/davidsandberg/facenet/issues/370

""" Face Cluster """import tensorflow as tfimport numpy as npimport importlibimport argparseimport facenetimport osimport mathdef face_distance(face_encodings, face_to_compare):    """    Given a list of face encodings, compare them to a known face encoding and get a euclidean distance    for each comparison face. The distance tells you how similar the faces are.    :param faces: List of face encodings to compare    :param face_to_compare: A face encoding to compare against    :return: A numpy ndarray with the distance for each face in the same order as the 'faces' array    """    import numpy as np    if len(face_encodings) == 0:        return np.empty((0))    #return np.sum(face_encodings*face_to_compare,axis=1)    return 1/np.linalg.norm(face_encodings - face_to_compare, axis=1)def _chinese_whispers(encoding_list, threshold=0.75, iterations=20):    """ Chinese Whispers Algorithm    Modified from Alex Loveless' implementation,    http://alexloveless.co.uk/data/chinese-whispers-graph-clustering-in-python/    Inputs:        encoding_list: a list of facial encodings from face_recognition        threshold: facial match threshold,default 0.6        iterations: since chinese whispers is an iterative algorithm, number of times to iterate    Outputs:        sorted_clusters: a list of clusters, a cluster being a list of imagepaths,            sorted by largest cluster to smallest    """    #from face_recognition.api import _face_distance    from random import shuffle    import networkx as nx    # Create graph    nodes = []    edges = []    image_paths, encodings = zip(*encoding_list)    if len(encodings) <= 1:        print ("No enough encodings to cluster!")        return []    for idx, face_encoding_to_check in enumerate(encodings):        # Adding node of facial encoding        node_id = idx+1        # Initialize 'cluster' to unique value (cluster of itself)        node = (node_id, {'cluster': image_paths[idx], 'path': image_paths[idx]})        nodes.append(node)        # Facial encodings to compare        if (idx+1) >= len(encodings):            # Node is last element, don't create edge            break        compare_encodings = encodings[idx+1:]        distances = face_distance(compare_encodings, face_encoding_to_check)        encoding_edges = []        for i, distance in enumerate(distances):            if distance > threshold:                # Add edge if facial match                edge_id = idx+i+2                encoding_edges.append((node_id, edge_id, {'weight': distance}))        edges = edges + encoding_edges    G = nx.Graph()    G.add_nodes_from(nodes)    G.add_edges_from(edges)    # Iterate    for _ in range(0, iterations):        cluster_nodes = G.nodes()        shuffle(cluster_nodes)        for node in cluster_nodes:            neighbors = G[node]            clusters = {}            for ne in neighbors:                if isinstance(ne, int):                    if G.node[ne]['cluster'] in clusters:                        #该节点邻居节点的类别的权重                        #对应上面的字典cluster的意思就是                        #对应的某个路径下文件的权重                        clusters[G.node[ne]['cluster']] += G[node][ne]['weight']                    else:                        clusters[G.node[ne]['cluster']] = G[node][ne]['weight']            # find the class with the highest edge weight sum            edge_weight_sum = 0            max_cluster = 0            #将邻居节点的权重最大值对应的文件路径给到当前节点            for cluster in clusters:                if clusters[cluster] > edge_weight_sum:                    edge_weight_sum = clusters[cluster]                    max_cluster = cluster            # set the class of target node to the winning local class            G.node[node]['cluster'] = max_cluster    clusters = {}    # Prepare cluster output    for (_, data) in G.node.items():        cluster = data['cluster']        path = data['path']        if cluster:            if cluster not in clusters:                clusters[cluster] = []            clusters[cluster].append(path)    # Sort cluster output    sorted_clusters = sorted(clusters.values(), key=len, reverse=True)    return sorted_clustersdef cluster_facial_encodings(facial_encodings):    """ Cluster facial encodings        Intended to be an optional switch for different clustering algorithms, as of right now        only chinese whispers is available.        Input:            facial_encodings: (image_path, facial_encoding) dictionary of facial encodings        Output:            sorted_clusters: a list of clusters, a cluster being a list of imagepaths,                sorted by largest cluster to smallest    """    if len(facial_encodings) <= 1:        print ("Number of facial encodings must be greater than one, can't cluster")        return []    # Only use the chinese whispers algorithm for now    sorted_clusters = _chinese_whispers(facial_encodings.items())    return sorted_clustersdef compute_facial_encodings(sess,images_placeholder,embeddings,phase_train_placeholder,image_size,                embedding_size,nrof_images,nrof_batches,emb_array,batch_size,paths):    """ Compute Facial Encodings        Given a set of images, compute the facial encodings of each face detected in the images and        return them. If no faces, or more than one face found, return nothing for that image.        Inputs:            image_paths: a list of image paths        Outputs:            facial_encodings: (image_path, facial_encoding) dictionary of facial encodings    """    for i in range(nrof_batches):        start_index = i*batch_size        end_index = min((i+1)*batch_size, nrof_images)        paths_batch = paths[start_index:end_index]        images = facenet.load_data(paths_batch, False, False, image_size)        feed_dict = { images_placeholder:images, phase_train_placeholder:False }        emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)    facial_encodings = {}    for x in range(nrof_images):        facial_encodings[paths[x]] = emb_array[x,:]    return facial_encodingsdef main(args):    """ Main    Given a list of images, save out facial encoding data files and copy    images into folders of face clusters.    """    from os.path import join, basename, exists    from os import makedirs    import numpy as np    import shutil    import sys    if not exists(args.output):        makedirs(args.output)    with tf.Graph().as_default():        with tf.Session() as sess:            train_set = facenet.get_dataset(args.input)            #image_list, label_list = facenet.get_image_paths_and_labels(train_set)            meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir))                        print('Metagraph file: %s' % meta_file)            print('Checkpoint file: %s' % ckpt_file)            facenet.load_model(args.model_dir, meta_file, ckpt_file)                        # Get input and output tensors            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")                        image_size = images_placeholder.get_shape()[1]            embedding_size = embeddings.get_shape()[1]                    # Run forward pass to calculate embeddings            print('Runnning forward pass on images')            #counter  = 0            for x in range(len(train_set)):                  #counter += 1                #if counter<56700:                #continue                if counter % 100 == 0:                    print(counter)                image_paths = train_set[x].image_paths                nrof_images = len(image_paths)                nrof_batches = int(math.ceil(1.0*nrof_images / args.batch_size))                emb_array = np.zeros((nrof_images, embedding_size))                facial_encodings = compute_facial_encodings(sess,images_placeholder,embeddings,phase_train_placeholder,image_size,                embedding_size,nrof_images,nrof_batches,emb_array,args.batch_size,image_paths)                sorted_clusters = cluster_facial_encodings(facial_encodings)                num_cluster = len(sorted_clusters)                #print('created %d cluster!',num_cluster)                #for idx,cluster in enumerate(sorted_clusters):                #    print('%d th cluster num :%d',idx,len(cluster))                dest_dir = join(args.output, train_set[x].name)                # Copy image files to cluster folders                for idx, cluster in enumerate(sorted_clusters):                    #这个是保存聚类后所有类别                    #cluster_dir = join(dest_dir, str(idx))                    #只保存聚类后个数最多的                    cluster_dir = dest_dir                    if len(cluster)<5:                    break                    if not exists(cluster_dir):                        makedirs(cluster_dir)                    for path in cluster:                        shutil.copy(path, join(cluster_dir, basename(path)))                    break    def parse_args():    """Parse input arguments."""    import argparse    parser = argparse.ArgumentParser(description='Get a shape mesh (t-pose)')    parser.add_argument('--model_dir', type=str, help='model dir', required=True)    parser.add_argument('--batch_size', type=int, help='model dir', required=30)    parser.add_argument('--input', type=str, help='Input dir of images', required=True)    parser.add_argument('--output', type=str, help='Output dir of clusters', required=True)    args = parser.parse_args()    return argsif __name__ == '__main__':    """ Entry point """    main(parse_args())