Linux PCI Express 配置空间读写内核实现

来源:互联网 发布:锐思金融数据库收费吗 编辑:程序博客网 时间:2024/05/16 19:29

Linux PCI Express

配置空间读写内核实现

 

1     PCI及PCI-E配置空间介绍

   PCI-E是用来互联如计算和通信平台应用中外围设备的第三代高性能I/O总线。PCI-E采用了与PCI相同的使用模型和读写(load-store)通信模型,支持各种常见的事务,如存储器读/写、IO读/写和配置读/写事务。其存储器、IO和配置地址空间与PCI的地址空间相同。PCI Express与PCI系统是软件向后兼容的。

PCI-E的配置空间大小为4096字节,如图1所示。其中前256字节是与PCI兼容的配置寄存器,该区域可以用以下两种机制访问:

·        PCI配置访问机制。

·        PCI Express增强型配置机制。

图1 PCI-E配置空间

 

Memory-mappedI/O (MMIO)与port I/O

   MMIO和port I/O(也称为port-mapped I/O或PMIO)是两种CPU与外设之间进行I/O操作的方式。

 

PortI/O是通过特殊的CPU指令来进行I/O操作,在x86架构上,可以通过指令in和out在特定的端口上进行I/O读写。I/O设备拥有与内存不同的地址空间,实现的方式是通过在CPU上额外的I/O pin或者将整个总线赋予端口。

 

MMIO即内存映射I/O,它是PCI规范一部分,I/O设备被放置在内存空间而不是I/O空。从处理器角度看,内存映射I/O后系统设备访问起来和内存一样。这样访问AGP/PCI-E显卡上的帧缓存,BIOS,PCI设备就可以使用读写内存一样的汇编指令完成,简化了程序设计的难度和接口的复杂性。

 

 对软件人员来说,MMIO比Port I/O更方便使用。

2     PCI-E配置空间读写在内核的实现

   用户空间的两个命令lspci和setpci来查看/修改PCI及PCI-E配置空间。用户命令执行的结果,是由内核来确定。那么我们关心一个问题:内核是如何真正去读取和修改配置空间的?

2.1     内核API接口

   Linux内核提供了以下PCI/PCI-E配置空间访问接口,在驱动编写过程中,我们可以直接使用下面函数。这些

·        pci_{read,write}_config_byte() 

·        pci_{read,write}_config_word()

·        pci_{read,write}_config_dword()

 

   函数的定义在文件include/linux/pci.h中。

00513:static inline int pci_read_config_byte(structpci_dev *dev,int  where,

00513:  u8*val)

00514: {

00515:         return pci_bus_read_config_byte(dev->bus, dev->devfn, where,val);

00516: }

00517:static inline int pci_read_config_word(structpci_dev *dev,int  where,

00517: u16*val)

00518: {

00519:         return pci_bus_read_config_word(dev->bus, dev->devfn, where,val);

00520: }

00521: staticinline int pci_read_config_dword(structpci_dev *dev,

00521:  int where,u32*val)

00522: {

00523:         return pci_bus_read_config_dword(dev->bus, dev->devfn, where,val);

00524: }

00525:static inline int pci_write_config_byte(structpci_dev *dev,int  where,

00525:  u8val)

00526: {

00527:         return pci_bus_write_config_byte(dev->bus, dev->devfn, where,val);

00528: }

00529:static inline int pci_write_config_word(structpci_dev *dev,

00529: int where,u16 val)

00530: {

00531:         return pci_bus_write_config_word(dev->bus, dev->devfn, where,val);

00532: }

00533: staticinline int pci_write_config_dword(structpci_dev *dev,

00533: int where,u32 val)

00534: {

00535:         return pci_bus_write_config_dword(dev->bus, dev->devfn, where,val);

00536: }

 

2.2     内核API实现

   在PCI/PCI-E 配置空间读写API接口中,我们看到是对pci_bus_{read,write}_config_{byte, word, dword}的的封装。这些函数由drivers/pci/access.c中以宏的方式定义。

00024: #definePCI_OP_READ(size,type,len)\

00025: intpci_bus_read_config_##size\

00026:         (struct pci_bus *bus, unsignedint devfn, int pos , type*value)                      \

00027: {                                                                           \

00028:         int res ;                                                        \

00029:         unsigned long flags;                                                      \

00030:         u32 data = 0;                                                               \

00031:         if (PCI_##size##_BAD)return PCIBIOS_BAD_REGISTER_NUMBER;                             \

00032:         spin_lock_irqsave(&pci_lock,flags);                                            \

00033:         res =bus ->ops- >read(bus, devfn,pos ,len,&data );                    \

00034:         *value = (type)data;                                                   \

00035:         spin_unlock_irqrestore(&pci_lock,flags);                                   \

00036:         return res ;                                                          \

00037: }

00038:

00039: #definePCI_OP_WRITE(size,type,len)\

00040: intpci_bus_write_config_##size\

00041:         (struct pci_bus *bus, unsignedint devfn, int pos , typevalue)                        \

00042: {                                                                           \

00043:         int res ;                                                        \

00044:         unsigned long flags;                                                      \

00045:         if (PCI_##size##_BAD)return PCIBIOS_BAD_REGISTER_NUMBER;                             \

00046:         spin_lock_irqsave(&pci_lock,flags);                                            \

00047:         res =bus ->ops- >write(bus, devfn,pos ,len,value);                       \

00048:         spin_unlock_irqrestore(&pci_lock,flags);                                   \

00049:         return res ;                                                          \

00050: }

 

00059: EXPORT_SYMBOL(pci_bus_read_config_byte);

00060: EXPORT_SYMBOL(pci_bus_read_config_word);

00061: EXPORT_SYMBOL(pci_bus_read_config_dword);

00062: EXPORT_SYMBOL(pci_bus_write_config_byte);

00063: EXPORT_SYMBOL(pci_bus_write_config_word);

00064: EXPORT_SYMBOL(pci_bus_write_config_dword);

 

pci_bus_{read,write}_config_{byte, word, dword}()等函数,调用的是bus->ops->write、bus->ops->read方法。显然,现在的bus总线是PCI/PCI-E,我们就关注内核定义PCI/PCI-E总线的读写操作方法。

注:Linux内核没有专门将PCI-E列为一种总线,而是将PCI-E合并到PCI总线中。

 

2.3     PCI总线读写方法

  PCI总线读写方法为pci_root_ops,对应的读写函数分别为pci_read()、pci_write()。实现在文件arch/i386/pci/common.c中。

00036: staticintpci_read(structpci_bus *bus,unsigned intdevfn,int  where,int  size,u32

00036:  *value)

00037: {

00038:         return raw_pci_ops- >read(pci_domain_nr(bus),bus->number,

00039:                                   devfn,where,size,value);

00040: }

00041:

00042: staticintpci_write(structpci_bus *bus,unsigned intdevfn,int  where,int  size,

00042: u32 value)

00043: {

00044:         return raw_pci_ops- >write(pci_domain_nr(bus),bus->number,

00045:                                    devfn,where,size,value);

00046: }

00047:

00048: structpci_ops pci_root_ops= {

00049:         .read = pci_read,

00050:         .write = pci_write,

00051: };

 

pci_read()、pci_write()依赖于raw_pci_ops全局变量。

 

2.3.1    raw_pci_ops全局变量的设置

   内核在启动时,会执行pci_access_init()函数,在文件arch/i386/pci/init.c中。该函数中,确定了raw_pci_ops值。

00005: /* arch_initcall has too randomordering, so call theinitializers

00006:   in the right sequence from here. */

00007: static__init int  pci_access_init(void)

00008: {

00009: #ifdefCONFIG_PCI_MMCONFIG

00010:         pci_mmcfg_init();

00011: #endif

00012:         dmi_check_pciprobe();

00013:

00014:         if (raw_pci_ops)

00015:                return 0;

00016:

00017: #ifdefCONFIG_PCI_BIOS

00018:         pci_pcbios_init();

00019: #endif

00020:         / *

00021:         * don't check for raw_pci_ops here because we want pcbios as last

00022:         * fallback, yet it'sneeded to run first to set pcibios_last_bus

00023:         * in case legacy PCI probingis used. otherwise detecting peer busses

00024:         * fails.

00025:         */

00026: #ifdefCONFIG_PCI_DIRECT

00027:         pci_direct_init();

00028: #endif

00029:         return0;

00030: }?  end pci_access_init ?

00031:  arch_initcall(pci_access_init);

 

对于访问PCI空间,通过Port I/O方式则可以实现完全访问。但要访问全部的PCI-E配置空间,则需要MMIO方式。MMIO方式访问,则需要Linux内核支持。在编译内核时,选中以下选项即可。

  Bus options (PCI etc.) ---> 

--- PCI support                                                                              

   [*]   Support mmconfig PCI config spaceaccess   

  

即需要选中“Supportmmconfig PCI config space access”。 若没有选中该项,则用户通过lspci或setpci命令,访问不到PCI-E的扩展配置空间(256~4096字节)。

 

为了访问PCI-E扩展配置空间,pci_access_init()函数会调用pci_mmcfg_init()。于是将raw_pci_ops的值设为pci_mmcfg,代码都在文件arch/i386/pci/mmconfig.c中。

00152: void__init  pci_mmcfg_init(void)

00153: {

    ... ...

00173:         raw_pci_ops = &pci_mmcfg;

    ... ...

00176: }?  end pci_mmcfg_init ?

 

00147: staticstruct pci_raw_opspci_mmcfg ={

00148:         .read =   pci_mmcfg_read,

00149:         .write =  pci_mmcfg_write,

00150: };

00151:

 

  若内核中没有选中“Support mmconfig PCIconfig space access”,则raw_pci_ops方法为:pci_direct_conf1或pci_direct_conf2。通常情况下,使用pci_direct_conf1。代码在文件/arc/i386/pci/direct.c中。

00257: void__init  pci_direct_init(void)

00258: {

00259:         struct resource *region,*region2;

        ......

00267:         if (pci_check_type1()){

00268:                printk(KERN_INFO"PCI: Using configuration type 1\n");

00269:                raw_pci_ops = &pci_direct_conf1;

00270:                return;

00271:         }

        ... ...

00284:         if (pci_check_type2()){

00285:                printk(KERN_INFO"PCI: Using configuration type 2\n");

00286:                raw_pci_ops = &pci_direct_conf2;

00287:                return;

00288:         }

00293: }?  end pci_direct_init ?

 

 

00079: structpci_raw_ops pci_direct_conf1= {

00080:         .read =                pci_conf1_read,

00081:         .write =  pci_conf1_write,

00082: };

 

00171: #undefPCI_CONF2_ADDRESS

00172:

00173: staticstruct pci_raw_opspci_direct_conf2 ={

00174:         .read =                pci_conf2_read,

00175:         .write =  pci_conf2_write,

00176: };

 

2.3.2    Port I/O方式访问配置空间

Port I/O方式也称为直接方式访问。

PCI规范规定,直接操作port读取PCI配置信息时,通过CONFIG_ADDRESS和CONFIG_DATA;两个寄存器进行。CONFIG_ADDRESS的值为0xCF8,CONFIG_DATA的值为0xCFC,两个寄存器都为32bit。两个寄存器就是对应x86架构中的端口号。图2为CONFIG_ADDRESS寄存器格式。

图2 CONFIG_ADDRESS寄存器格式

bit31是使能对PCI Bus CONFIG_DATA的访问;

bit 30~24为保留,为只读,访问时返回值为0;

bit 23~16是Bus号;

bit 15~10是设备号;

bit 10~8是功能号;

bit 7~2是配置空间中的寄存器,单位为DWORD。

bit 1~0为只读,读取时放回为0。

 

这样直接访问PCI配置空间时,分为两步:
   第一步是向CONFIG_ADDRESS寄存器(端口0xCF8)写入要读/写的位置;
   
第二步是从CONFIG_DATA寄存器(端口0xCFC)读/写所需要数据。

   Linux内核对PCI配置空间直接访问的实现函数分别为pci_conf1_read()/pci_conf1_write()和pci_conf2_read()/pci_conf2_write(),分别对应读写Type 0和Type 1的配置空间。对于我们的PCI-E外设来说,是Type 0型配置空间。这里我们只关注Type 0。

函数pci_conf1_read()和pci_conf1_write()函数在文件arch/i386/pci/direct.c中。

00017: int pci_conf1_read(unsignedint seg,unsigned intbus,

00018:                            unsigned int  devfn,int  reg,int  len,u32 *value)

00019: {

00020:         unsigned long flags;

00021:

00022:         if ((bus> 255)|| (devfn> 255)|| (reg> 255)){

00023:                *value= - 1;

00024:                return -EINVAL;

00025:         }

00026:

00027:         spin_lock_irqsave(&pci_config_lock, flags);

00028:

00029:         outl(PCI_CONF1_ADDRESS(bus,devfn,reg),0xCF8);

00030:

00031:         switch (len){

00032:         case 1:

00033:                *value= inb(0xCFC+ (reg& 3));

00034:                break;

00035:         case 2:

00036:                *value= inw(0xCFC+ (reg& 2));

00037:                break;

00038:         case 4:

00039:                *value= inl(0xCFC);

00040:                break;

00041:         }

00042:

00043:         spin_unlock_irqrestore(&pci_config_lock, flags);

00044:

00045:         return0;

00046: }?  end pci_conf1_read ?

00047:

00048: int pci_conf1_write(unsignedint seg,unsigned intbus,

00049:                             unsigned int  devfn,int  reg,int  len,u32  value)

00050: {

00051:         unsigned long flags;

00052:

00053:         if ((bus> 255)|| (devfn> 255)|| (reg> 255))

00054:                return -EINVAL;

00056:         spin_lock_irqsave(&pci_config_lock, flags);

00057:

00058:         outl(PCI_CONF1_ADDRESS(bus,devfn,reg),0xCF8);

00059:

00060:         switch (len){

00061:         case 1:

00062:                outb((u8)value,0xCFC + (reg & 3));

00063:                break;

00064:         case 2:

00065:                outw((u16)value,0xCFC + (reg & 2));

00066:                break;

00067:         case 4:

00068:                outl((u32)value,0xCFC);

00069:                break;

00070:         }

00071:

00072:         spin_unlock_irqrestore(&pci_config_lock, flags);

00073:

00074:         return0;

00075: }?  end pci_conf1_write ?

00076:

 

2.3.3    MMIO方式访问配置空间

     Port I/O方式只能访问PCI配置空间,而不能访问PCI-E扩展配置空间(257~4096字节),此时只能通过MMIO方式。Linux内核中的MMIO实现读/写分别对应函数pci_mmcfg_write()和pci_mmcfg_read()。函数在文件arch/i386/pci/mmconfig.c中。

00071: staticintpci_mmcfg_read(unsignedint seg,unsigned intbus,

00072:                            unsigned int  devfn,int  reg,int  len,u32 *value)

00073: {

00074:         unsigned long flags;

00075:         u32 base;

00076:

00077:         if ((bus> 255)|| (devfn> 255)|| (reg> 4095)){

00078:  err:       *value =- 1;

00079:                return -EINVAL;

00080:         }

00081:

00082:         if (reg< 256)

00083:                return pci_conf1_read(seg,bus,devfn,reg,len,value);

00084:

00085:         base = get_base_addr(seg,bus,devfn);

00086:         if (!base)

00087:                goto err;

00088:

00089:         spin_lock_irqsave(&pci_config_lock, flags);

00090:

00091:         pci_exp_set_dev_base(base,bus,devfn);

00092:

00093:         switch (len){

00094:         case 1:

00095:                *value= mmio_config_readb(mmcfg_virt_addr+ reg);

00096:                break;

00097:         case 2:

00098:                *value= mmio_config_readw(mmcfg_virt_addr+ reg);

00099:                break;

00100:         case 4:

00101:                *value= mmio_config_readl(mmcfg_virt_addr+ reg);

00102:                break;

00103:         }

00104:

00105:         spin_unlock_irqrestore(&pci_config_lock, flags);

00106:

00107:         return0;

00108: }?  end pci_mmcfg_read ?

 

 

00110: staticint pci_mmcfg_write(unsignedint seg,unsigned intbus,

00111:                             unsigned int  devfn,int  reg,int  len,u32  value)

00112: {

00113:         unsigned long flags;

00114:         u32 base;

00115:

00116:         if ((bus> 255)|| (devfn> 255)|| (reg> 4095))

00117:                return -EINVAL;

00118:

00119:         if (reg< 256)

00120:                return pci_conf1_write(seg,bus,devfn,reg,len,value);

00121:

00122:         base = get_base_addr(seg,bus,devfn);

00123:         if (!base)

00124:                return -EINVAL;

00125:

00126:         spin_lock_irqsave(&pci_config_lock, flags);

00127:

00128:         pci_exp_set_dev_base(base,bus,devfn);

00129:

00130:         switch (len){

00131:         case 1:

00132:                mmio_config_writeb(mmcfg_virt_addr+ reg,value);

00133:                break;

00134:         case 2:

00135:                mmio_config_writew(mmcfg_virt_addr+ reg,value);

00136:                break;

00137:         case 4:

00138:                mmio_config_writel(mmcfg_virt_addr+ reg,value);

00139:                break;

00140:         }

00141:

00142:         spin_unlock_irqrestore(&pci_config_lock, flags);

00143:

00144:         return0;

00145: }?  end pci_mmcfg_write ?

 

若访问的配置空间在前面256字节范围内,则直接调用直接访问方式(Port I/O)。若访问PCI-E扩展配置空间,则首先通过get_base_addr()函数获取设备对应的内存空间物理地址,然后通过pci_exp_set_dev_base()函数将物理地址映射到一个线性地址,最后通过mmio_config_{read, write}{b, w, l}执行真正的读写。

 

1.   get_base_addr()

 00028/ *

00029: *Functions for accessing PCI configuration space with MMCONFIGaccesses

00030: */

00031: staticu32get_base_addr(unsignedint seg,int  bus,unsigned devfn)

00032: {

00033:         int cfg_num = - 1;

00034:         struct acpi_table_mcfg_config*cfg;

00035:

00036:         while(1){

00037:                ++cfg_num;

00038:                if (cfg_num>= pci_mmcfg_config_num) {

00039:                         break;

00040:                }

00041:                cfg = &pci_mmcfg_config[cfg_num];

00042:                if (cfg->pci_segment_group_number ! = seg)

00043:                         continue;

00044:                if ((cfg->start_bus_number <= bus)&&

00045:                     (cfg->end_bus_number >= bus))

00046:                         return cfg->base_address;

00047:         }

00048:

00049:         / * Handle more broken MCFG tableson Asus etc.

00050:          They only contain a single entryfor bus 0- 0. Assume

00051:          this applies to all busses. */

00052:         cfg = &pci_mmcfg_config[0];

00053:         if (pci_mmcfg_config_num== 1 &&

00054:                cfg- >pci_segment_group_number ==0 &&

00055:                (cfg->start_bus_number | cfg->end_bus_number) ==0)

00056:                return cfg->base_address;

00057:

 

全局变量pci_mmcfg_config是所有PCI/PCI-E设备的MMIO映射表,MMIO映射表是内核根据BIOS POST结构初始化PCI总线时设置好,内核读取分配的值即可。

 

2.   pci_exp_set_dev_base()

通过get_base_addr()获取到的地址是物理地址,为了能读取,还需通过桉树pci_exp_set_dev_base(base, bus, devfn);理地址转换为逻辑地址。

00062:static inline void pci_exp_set_dev_base(unsignedint base,int  bus,int

00062:  devfn)

00063: {

00064:         u32 dev_base = base|(bus<< 20) |(devfn<< 12);

00065:         if (dev_base!= mmcfg_last_accessed_device) {

00066:                mmcfg_last_accessed_device  =dev_base;

00067:                set_fixmap_nocache(FIX_PCIE_MCFG,dev_base);

00068:         }

00069: }

 

   文件include/asm-i386/fixmap.h。

00100: /*

00101: *Some hardwarewants to get fixmapped withoutcaching.

00102: */

00103: #defineset_fixmap_nocache(idx, phys)\

00104:                __set_fixmap(idx,phys,PAGE_KERNEL_NOCACHE)

00105:

 

   文件arch/i386/mm/pgtable.c。

00140:void  __set_fixmap(enumfixed_addresses idx,unsigned longphys,pgprot_t

00140:  flags)

00141: {

00142:         unsigned long address= __fix_to_virt(idx);

00143:

00144:         if (idx>= __end_of_fixed_addresses){

00145:                BUG();

00146:                return;

00147:         }

00148:         set_pte_pfn(address,phys >>PAGE_SHIFT, flags);

00149: }

00150:

 

00023: #definemmcfg_virt_addr ((void__iomem *)fix_to_virt(FIX_PCIE_MCFG))

 

2.4     用户接口在内核的实现  

前面我们已经提到可以通过lspci和setpci命令来读写PCI/PCI-E配置。而这些命令的实现是基于内核提供的/sysfs接口或/proc接口。

内核为PCI/PCI-E总线提供的/sysfs读写方法如下,文件drivers/pci/pci-sysfs.c。

00510: staticstruct bin_attributepci_config_attr ={

00511:         .attr =        {

00512:         .name = "config",

00513:                .mode = S_IRUGO| S_IWUSR,

00514:                .owner = THIS_MODULE,

00515:         },

00516:         .size =256,

00517:         .read = pci_read_config,

00518:         .write = pci_write_config,

00519: };

00520:

00521: staticstruct bin_attributepcie_config_attr ={

00522:         .attr =        {

00523:                .name = "config",

00524:                .mode = S_IRUGO| S_IWUSR,

00525:                .owner = THIS_MODULE,

00526:         },

00527:         .size =4096,

00528:         .read = pci_read_config,

00529:         .write = pci_write_config,

00530: };

00531:

 

   pci_read_config()和pci_write_config()函数进而调用pci_user_write_config_{dword, word, byte}。我们来看一下setpci命令执行时(图3),内核栈信息和lspci栈信息(图4)。

由栈信息我们可以看出,函数最终调用pci_conf1_write()函数。也就是/sysfs提供的读写接口,也最终是使用Port I/O和MMIO方式。

 

图3 pci_conf1_write()函数调用栈

图4 pci_mmcfg_read()函数调用栈


原文:http://www.ilinuxkernel.com/files/5/Linux_PCI_Express_Kernel_RW.htm

0 0
原创粉丝点击