
来源:互联网 发布:seo 网站收录批量提交 编辑:程序博客网 时间:2024/05/19 00:13


#define _GNU_SOURCE#include <stdio.h>#include <pthread.h>#include <unistd.h>#include <stdlib.h>#include <sched.h>#include <linux/unistd.h>#include <sys/syscall.h>#include <errno.h>#include <linux/types.h>#include <time.h>#define INC_TO 1000000 // one million...__u64 rdtsc(){  __u32 lo,hi;    __asm__ __volatile__    (     "rdtsc":"=a"(lo),"=d"(hi)    );    return (__u64)hi<<32|lo;}int global_int0 = 0;int global_int1 = 0;int global_int2 = 0;pthread_mutex_t count_lock = PTHREAD_MUTEX_INITIALIZER;pid_t gettid( void ){    return syscall( __NR_gettid );}void *thread_routine0( void *arg ){    int i;    int proc_num = (int)(long)arg;    __u64 begin, end;    struct timeval tv_begin,tv_end;    __u64 timeinterval;    cpu_set_t set;    CPU_ZERO( &set );    CPU_SET( proc_num, &set );    if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))    {        perror( "sched_setaffinity" );        return NULL;    }    begin = rdtsc();    gettimeofday(&tv_begin,NULL);    for (i = 0; i < INC_TO; i++)    {         __sync_fetch_and_add( &global_int0, 1 );    }    gettimeofday(&tv_end,NULL);    end = rdtsc();    timeinterval =(tv_end.tv_sec - tv_begin.tv_sec) * 1000000 + (tv_end.tv_usec - tv_begin.tv_usec);    fprintf(stderr,"proc_num :%d,__sync_fetch_and_add cost %llu CPU cycle,cost %llu us\n", proc_num, end-begin, timeinterval);    return NULL;}void *thread_routine1( void *arg ){    int i;    int proc_num = (int)(long)arg;    __u64 begin, end;    struct timeval tv_begin,tv_end;    __u64 timeinterval;    cpu_set_t set;    CPU_ZERO( &set );    CPU_SET( proc_num, &set );    if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))    {        perror( "sched_setaffinity" );        return NULL;    }    begin = rdtsc();    gettimeofday(&tv_begin,NULL);    for (i = 0; i < INC_TO; i++)    {         global_int1++;    }    gettimeofday(&tv_end,NULL);    end = rdtsc();    timeinterval =(tv_end.tv_sec - tv_begin.tv_sec) * 1000000 + (tv_end.tv_usec - tv_begin.tv_usec);    fprintf(stderr,"proc_num :%d,__sync_fetch_and_add cost %llu CPU cycle,cost %llu us\n", proc_num, end-begin, timeinterval);    return NULL;}void *thread_routine2( void *arg ){    int i;    int proc_num = (int)(long)arg;    __u64 begin, end;    struct timeval tv_begin,tv_end;    __u64 timeinterval;    cpu_set_t set;    CPU_ZERO( &set );    CPU_SET( proc_num, &set );    if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))    {        perror( "sched_setaffinity" );        return NULL;    }    begin = rdtsc();    gettimeofday(&tv_begin,NULL);    for(i = 0;i<INC_TO;i++)    {        pthread_mutex_lock(&count_lock);        global_int2++;        pthread_mutex_unlock(&count_lock);    }    gettimeofday(&tv_end,NULL);    end = rdtsc();    timeinterval =(tv_end.tv_sec - tv_begin.tv_sec) * 1000000 + (tv_end.tv_usec - tv_begin.tv_usec);    fprintf(stderr,"proc_num :%d,pthread lock cost %llu CPU cycle,cost %llu us\n", proc_num, end-begin, timeinterval);    return NULL;}int main(){    int procs = 0;    int i;    pthread_t *thrs;    // Getting number of CPUs    procs = (int)sysconf( _SC_NPROCESSORS_ONLN );    if (procs < 0)    {        perror( "sysconf" );        return -1;    }    thrs = malloc( sizeof( pthread_t ) * procs );    if (thrs == NULL)    {        perror( "malloc" );        return -1;    }    printf( "Starting %d threads...\n", procs );    for (i = 0; i < procs; i++)    {        if (pthread_create( &thrs[i], NULL, thread_routine0,            (void *)(long)i ))        {            perror( "pthread_create" );            procs = i;            break;        }    }    for (i = 0; i < procs; i++)        pthread_join( thrs[i], NULL );    printf("===== use automic method =====\n");    printf("After doing all the math, global_int value is: %d\n", global_int0);    printf("Expected value is: %d\n", INC_TO * procs );    printf("Correct rate: %f%\n\n", (double)(((double)global_int0)/((double)(INC_TO * procs)) * 100));    for (i = 0; i < procs; i++)    {        if (pthread_create( &thrs[i], NULL, thread_routine1,            (void *)(long)i ))        {            perror( "pthread_create" );            procs = i;            break;        }    }    for (i = 0; i < procs; i++)        pthread_join( thrs[i], NULL );    printf("===== don't use pthread mutex =====\n");    printf("After doing all the math, global_int value is: %d\n", global_int1 );    printf("Expected value is: %d\n", INC_TO * procs );    printf("Correct rate: %f%\n\n", (double)(((double)global_int1)/((double)(INC_TO * procs)) * 100));    for (i = 0; i < procs; i++)    {        if (pthread_create( &thrs[i], NULL, thread_routine2,            (void *)(long)i ))        {            perror( "pthread_create" );            procs = i;            break;        }    }    for (i = 0; i < procs; i++)        pthread_join( thrs[i], NULL );    printf("===== use pthread mutex =====\n");    printf("After doing all the math, global_int value is: %d\n", global_int2);    printf("Expected value is: %d\n", INC_TO * procs );    printf("Correct rate: %f%\n\n", (double)(((double)global_int2)/((double)(INC_TO * procs)) * 100));    free(thrs);    return 0;}jwwang@jwwang:~/test$ ./a.out Starting 4 threads...proc_num :2,__sync_fetch_and_add cost 201870083 CPU cycle,cost 74471 usproc_num :1,__sync_fetch_and_add cost 216505929 CPU cycle,cost 79872 usproc_num :0,__sync_fetch_and_add cost 221643287 CPU cycle,cost 81767 usproc_num :3,__sync_fetch_and_add cost 202669487 CPU cycle,cost 74767 us===== use automic method =====After doing all the math, global_int value is: 4000000Expected value is: 4000000Correct rate: 100.000000%proc_num :0,__sync_fetch_and_add cost 22410118 CPU cycle,cost 8267 usproc_num :1,__sync_fetch_and_add cost 29147387 CPU cycle,cost 10752 usproc_num :3,__sync_fetch_and_add cost 29398320 CPU cycle,cost 10845 usproc_num :2,__sync_fetch_and_add cost 11182123 CPU cycle,cost 4124 us===== don't use pthread mutex =====After doing all the math, global_int value is: 1789149Expected value is: 4000000Correct rate: 44.728725%proc_num :0,pthread lock cost 875600915 CPU cycle,cost 323023 usproc_num :3,pthread lock cost 905362503 CPU cycle,cost 334003 usproc_num :1,pthread lock cost 926347504 CPU cycle,cost 341745 usproc_num :2,pthread lock cost 938705022 CPU cycle,cost 346304 us===== use pthread mutex =====After doing all the math, global_int value is: 4000000Expected value is: 4000000Correct rate: 100.000000%1 不加锁的情况下,不能返回正确的结果  测试程序结果显示,正确结果为400万,实际为1789149  每台机器不一样,当时肯定结果不正确2 线程锁和原子性自加都能返回正确的结果。3 性能上__sync_fetch_and_add,完爆线程锁。  从测试结果上看, __sync_fetch_and_add,速度是线程锁的4倍  机器不一样,结果也不一样,当时可定比线程锁好