dpdk helloworld代码分析

来源:互联网 发布:sql2000还原数据库失败 编辑:程序博客网 时间:2024/06/07 07:22

http://www.cnblogs.com/chanwai1219/p/3632263.html

dpdk helloworld代码分析

复制代码
intMAIN(int argc, char **argv){    int ret;    unsigned lcore_id;    ret = rte_eal_init(argc, argv);    if (ret < 0)        rte_panic("Cannot init EAL\n");    /* call lcore_hello() on every slave lcore */    RTE_LCORE_FOREACH_SLAVE(lcore_id) {        rte_eal_remote_launch(lcore_hello, NULL, lcore_id);    }    /* call it on master lcore too */    lcore_hello(NULL);    rte_eal_mp_wait_lcore();    return 0;}
复制代码

 

程序的流程如下图所示:

image

 

 

代码首先初始化了Environment Abstraction Layer(EAL),EAL主要提供了以下功能

复制代码
• Intel® DPDK loading and launching• Support for multi-process and multi-thread execution types• Core affinity/assignment procedures• System memory allocation/de-allocation• Atomic/lock operations• Time reference• PCI bus access• Trace and debug functions• CPU feature identification• Interrupt handling• Alarm operations
复制代码

 

num_pages

复制代码
/* Launch threads, called at application init(). */intrte_eal_init(int argc, char **argv){    int i, fctret, ret;    pthread_t thread_id;    static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);    struct shared_driver *solib = NULL;    const char *logid;    /* 只允许运行一次 */    if (!rte_atomic32_test_and_set(&run_once))        return -1;    logid = strrchr(argv[0], '/');    logid = strdup(logid ? logid + 1: argv[0]);    thread_id = pthread_self();    if (rte_eal_log_early_init() < 0)        rte_panic("Cannot init early logs\n");    /* 获取系统中的CPU数量 */    if (rte_eal_cpu_init() < 0)        rte_panic("Cannot detect lcores\n");    /* 根据命令行参数初始化internal_config */    fctret = eal_parse_args(argc, argv);    if (fctret < 0)        exit(1);    /* 初始化系统中hugepage种类以及数量信息到internal_config.hugepage_info,用于后续内存初始化 */    if (internal_config.no_hugetlbfs == 0 &&            internal_config.process_type != RTE_PROC_SECONDARY &&            internal_config.xen_dom0_support == 0 &&            eal_hugepage_info_init() < 0)        rte_panic("Cannot get hugepage information\n");    /* 获取系统中所有hugepage内存大小,计算方法hugepage_sz*num_pages */    if (internal_config.memory == 0 && internal_config.force_sockets == 0) {        if (internal_config.no_hugetlbfs)            internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;        else            internal_config.memory = eal_get_hugepage_mem_size();    }    if (internal_config.vmware_tsc_map == 1) {#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT        rte_cycles_vmware_tsc_map = 1;        RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "                "you must have monitor_control.pseudo_perfctr = TRUE\n");#else        RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "                "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");#endif    }    rte_srand(rte_rdtsc());    /* 在/var/run或者用户的home目录创建.rte_config文件用于存储内存配置信息(rte_mem_config结构)如果process type为RTE_PROC_SECONDARY则等待PRIMARY完成内存初始化 */    rte_config_init();    /* 请求IO权限 */    if (rte_eal_iopl_init() == 0)        rte_config.flags |= EAL_FLG_HIGH_IOPL;        /* 扫描系统中所有的PCI设备,并创建对应的device结构链到device_list中 */    if (rte_eal_pci_init() < 0)        rte_panic("Cannot init PCI\n");#ifdef RTE_LIBRTE_IVSHMEM    if (rte_eal_ivshmem_init() < 0)        rte_panic("Cannot init IVSHMEM\n");#endif    /* 初始化rte_config->mem_config,并映射hugepage到挂载目录下的文件rte_map* */    if (rte_eal_memory_init() < 0)        rte_panic("Cannot init memory\n");    /* the directories are locked during eal_hugepage_info_init */    eal_hugedirs_unlock();        /* memzone可用内存初始化 */    if (rte_eal_memzone_init() < 0)        rte_panic("Cannot init memzone\n");    /* memconfig链表初始化 */    if (rte_eal_tailqs_init() < 0)        rte_panic("Cannot init tail queues for objects\n");#ifdef RTE_LIBRTE_IVSHMEM    if (rte_eal_ivshmem_obj_init() < 0)        rte_panic("Cannot init IVSHMEM objects\n");#endif    if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)        rte_panic("Cannot init logs\n");    /* 告警? 具体内容待分析 */    if (rte_eal_alarm_init() < 0)        rte_panic("Cannot init interrupt-handling thread\n");    /* 创建与收包驱动通信用管道并初始化中断处理线程 */    if (rte_eal_intr_init() < 0)        rte_panic("Cannot init interrupt-handling thread\n");    /* 定时器 */    if (rte_eal_timer_init() < 0)        rte_panic("Cannot init HPET or TSC timers\n");    /* 检查master core所在socket是否有内存 */    eal_check_mem_on_local_socket();    /* 标记初始化完成 */    rte_eal_mcfg_complete();    /* 白名单内设备初始化 */    if (rte_eal_non_pci_ethdev_init() < 0)        rte_panic("Cannot init non-PCI eth_devs\n");    /* 动态链接库 */    TAILQ_FOREACH(solib, &solib_list, next) {        solib->lib_handle = dlopen(solib->name, RTLD_NOW);        if ((solib->lib_handle == NULL) && (solib->name[0] != '/')) {            /* relative path: try again with "./" prefix */            char sopath[PATH_MAX];            snprintf(sopath, sizeof(sopath), "./%s", solib->name);            solib->lib_handle = dlopen(sopath, RTLD_NOW);        }        if (solib->lib_handle == NULL)            RTE_LOG(WARNING, EAL, "%s\n", dlerror());    }    RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",        rte_config.master_lcore, (int)thread_id);    /* 创建lcore的主线程 */    RTE_LCORE_FOREACH_SLAVE(i) {        /*         * create communication pipes between master thread         * and children         */        if (pipe(lcore_config[i].pipe_master2slave) < 0)            rte_panic("Cannot create pipe\n");        if (pipe(lcore_config[i].pipe_slave2master) < 0)            rte_panic("Cannot create pipe\n");        lcore_config[i].state = WAIT;        /* create a thread for each lcore */        ret = pthread_create(&lcore_config[i].thread_id, NULL,                     eal_thread_loop, NULL);        if (ret != 0)            rte_panic("Cannot create thread\n");    }    /* master线程绑定CPU */    eal_thread_init_master(rte_config.master_lcore);    /*     * Launch a dummy function on all slave lcores, so that master lcore     * knows they are all ready when this function returns.     */    /* 通知lcore开始调用loop */    rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);    rte_eal_mp_wait_lcore();    return fctret;}
复制代码

 

下面主要分析一下内存的初始化过程

 

对于process type是PRIMARY的调用rte_eal_hugepage_init; SECONDARY的调用rte_eal_hugepage_attach;

复制代码
/* init memory subsystem */intrte_eal_memory_init(void){    RTE_LOG(INFO, EAL, "Setting up memory...\n");    const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?            rte_eal_hugepage_init() :            rte_eal_hugepage_attach();    if (retval < 0)        return -1;    if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)        return -1;    return 0;}
复制代码

 

复制代码
/* * Prepare physical memory mapping: fill configuration structure with * these infos, return 0 on success. *  1. map N huge pages in separate files in hugetlbfs *  2. find associated physical addr *  3. find associated NUMA socket ID *  4. sort all huge pages by physical address *  5. remap these N huge pages in the correct order *  6. unmap the first mapping *  7. fill memsegs in configuration with contiguous zones */static intrte_eal_hugepage_init(void){    struct rte_mem_config *mcfg;    struct hugepage_file *hugepage, *tmp_hp = NULL;    struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];    uint64_t memory[RTE_MAX_NUMA_NODES];    unsigned hp_offset;    int i, j, new_memseg;    int nr_hugefiles, nr_hugepages = 0;    void *addr;#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS    int new_pages_count[MAX_HUGEPAGE_SIZES];#endif    memset(used_hp, 0, sizeof(used_hp));    /* get pointer to global configuration */    mcfg = rte_eal_get_configuration()->mem_config;    /* hugetlbfs can be disabled */    if (internal_config.no_hugetlbfs) {        /* 对于不使用hugetlbfs的直接使用堆内存 */        addr = malloc(internal_config.memory);        mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;        mcfg->memseg[0].addr = addr;        mcfg->memseg[0].len = internal_config.memory;        mcfg->memseg[0].socket_id = SOCKET_ID_ANY;        return 0;    }/* check if app runs on Xen Dom0 */    if (internal_config.xen_dom0_support) {#ifdef RTE_LIBRTE_XEN_DOM0        /* use dom0_mm kernel driver to init memory */        if (rte_xen_dom0_memory_init() < 0)            return -1;        else            return 0;#endif    }    /* calculate total number of hugepages available. at this point we haven't     * yet started sorting them so they all are on socket 0 */    for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {        /* meanwhile, also initialize used_hp hugepage sizes in used_hp */        used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;        nr_hugepages += internal_config.hugepage_info[i].num_pages[0];    }    /* tmp_hp为hugepage的控制块 */    /*     * allocate a memory area for hugepage table.     * this isn't shared memory yet. due to the fact that we need some     * processing done on these pages, shared memory will be created     * at a later stage.     */    tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));    if (tmp_hp == NULL)        goto fail;    memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));    hp_offset = 0; /* where we start the current page size entries */    /* map all hugepages and sort them */    for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){        struct hugepage_info *hpi;        /*         * we don't yet mark hugepages as used at this stage, so         * we just map all hugepages available to the system         * all hugepages are still located on socket 0         */        hpi = &internal_config.hugepage_info[i];        if (hpi->num_pages[0] == 0)            continue;        /* 把所有hugepage映射进内存 */        /* map all hugepages available */        if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){            RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",                    (unsigned)(hpi->hugepage_sz / 0x100000));            goto fail;        }        /* 记录每一片hugepage的物理内存 */        /* find physical addresses and sockets for each hugepage */        if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){            RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",                    (unsigned)(hpi->hugepage_sz / 0x100000));            goto fail;        }        /* 记录每一个片hugepage的socket id */        if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){            RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",                    (unsigned)(hpi->hugepage_sz / 0x100000));            goto fail;        }        /* 控制块按照物理地址从小到大排序 */        if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0)            goto fail;#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS        /* remap all hugepages into single file segments */        new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);        if (new_pages_count[i] < 0){            RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",                    (unsigned)(hpi->hugepage_sz / 0x100000));            goto fail;        }        /* we have processed a num of hugepages of this size, so inc offset */        hp_offset += new_pages_count[i];#else        /* 连续的物理内存hugepage找到对应连续的虚拟内存空间重新映射 */        /* remap all hugepages */        if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){            RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",                    (unsigned)(hpi->hugepage_sz / 0x100000));            goto fail;        }        /* 删除第一次不连续的映射 */        /* unmap original mappings */        if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0)            goto fail;        /* we have processed a num of hugepages of this size, so inc offset */        hp_offset += hpi->num_pages[0];#endif    }#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS    nr_hugefiles = 0;    for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {        nr_hugefiles += new_pages_count[i];    }#else    nr_hugefiles = nr_hugepages;#endif    /* 所有socket的内存清零 */    /* clean out the numbers of pages */    for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)        for (j = 0; j < RTE_MAX_NUMA_NODES; j++)            internal_config.hugepage_info[i].num_pages[j] = 0;    /* 重新计算每个socket对应size的内存 */    /* get hugepages for each socket */    for (i = 0; i < nr_hugefiles; i++) {        int socket = tmp_hp[i].socket_id;        /* find a hugepage info with right size and increment num_pages */        for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) {            if (tmp_hp[i].size ==                    internal_config.hugepage_info[j].hugepage_sz) {#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS                    internal_config.hugepage_info[j].num_pages[socket] +=                        tmp_hp[i].repeated;#else                internal_config.hugepage_info[j].num_pages[socket]++;#endif            }        }    }    /* make a copy of socket_mem, needed for number of pages calculation */    for (i = 0; i < RTE_MAX_NUMA_NODES; i++)        memory[i] = internal_config.socket_mem[i];    /* 把每个socket内存的情况写入used_hp,并返回所有hugepage页数 */    /* calculate final number of pages */    nr_hugepages = calc_num_pages_per_socket(memory,            internal_config.hugepage_info, used_hp,            internal_config.num_hugepage_sizes);    /* error if not enough memory available */    if (nr_hugepages < 0)        goto fail;    /* reporting in! */    for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {        for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {            if (used_hp[i].num_pages[j] > 0) {                RTE_LOG(INFO, EAL,                        "Requesting %u pages of size %uMB"                        " from socket %i\n",                        used_hp[i].num_pages[j],                        (unsigned)                            (used_hp[i].hugepage_sz / 0x100000),                        j);            }        }    }    /* 创建可通过文件定位到的共享hugepage控制块 */    /* create shared memory */    hugepage = create_shared_memory(eal_hugepage_info_path(),            nr_hugefiles * sizeof(struct hugepage_file));    if (hugepage == NULL) {        RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");        goto fail;    }    memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));    /* 根据used_hp的内容,每个socket内存只映射used_hp[i].num_pages[socket],剩下的unmap,什么时候会出现这种情况呢? */    /*     * unmap pages that we won't need (looks at used_hp).     * also, sets final_va to NULL on pages that were unmapped.     */    if (unmap_unneeded_hugepages(tmp_hp, used_hp,            internal_config.num_hugepage_sizes) < 0) {        RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");        goto fail;    }    /* 最终结果的hugepage控制块复制到共享内存中 */    /*     * copy stuff from malloc'd hugepage* to the actual shared memory.     * this procedure only copies those hugepages that have final_va     * not NULL. has overflow protection.     */    if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,            tmp_hp, nr_hugefiles) < 0) {        RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");        goto fail;    }    /* free the temporary hugepage table */    free(tmp_hp);    tmp_hp = NULL;    /* find earliest free memseg - this is needed because in case of IVSHMEM,     * segments might have already been initialized */    for (j = 0; j < RTE_MAX_MEMSEG; j++)        if (mcfg->memseg[j].addr == NULL) {            /* move to previous segment and exit loop */            j--;            break;        }    for (i = 0; i < nr_hugefiles; i++) {        new_memseg = 0;        /* if this is a new section, create a new memseg */        if (i == 0)            new_memseg = 1;        else if (hugepage[i].socket_id != hugepage[i-1].socket_id)            new_memseg = 1;        else if (hugepage[i].size != hugepage[i-1].size)            new_memseg = 1;        else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=            hugepage[i].size)            new_memseg = 1;        else if (((unsigned long)hugepage[i].final_va -            (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)            new_memseg = 1;        /* 物理地址连续且虚拟地址连续的内存块为一个segment */        if (new_memseg) {            j += 1;            if (j == RTE_MAX_MEMSEG)                break;            mcfg->memseg[j].phys_addr = hugepage[i].physaddr;            mcfg->memseg[j].addr = hugepage[i].final_va;#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS            mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;#else            mcfg->memseg[j].len = hugepage[i].size;#endif            mcfg->memseg[j].socket_id = hugepage[i].socket_id;            mcfg->memseg[j].hugepage_sz = hugepage[i].size;        }        /* continuation of previous memseg */        else {            mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;        }        hugepage[i].memseg_id = j;    }    if (i < nr_hugefiles) {        RTE_LOG(ERR, EAL, "Can only reserve %d pages "            "from %d requested\n"            "Current %s=%d is not enough\n"            "Please either increase it or request less amount "            "of memory.\n",            i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),            RTE_MAX_MEMSEG);        return (-ENOMEM);    }    return 0;fail:    if (tmp_hp)        free(tmp_hp);    return -1;}
复制代码
0 0
原创粉丝点击