linux libata初始化分析

来源:互联网 发布:php 字符型转变为数值 编辑:程序博客网 时间:2024/05/29 07:13
进来分析libata模块,颇有所感,记录如下,希望能对大家有所帮助,同时也对自己的理解进一步深入。
linux版本:linux-2.6.24.3
注:因完全是个人理解,理解不当难免,恳请批评指正!!!!

大家知道驱动程序在初始化sata controller后, 并初始化ata_host结构体后,会调用函数ata_host_activate进入libata的初始化,我们从这里开始分析。

下面是freescale  mpc8315平台的sata驱动代码。

linux/driver/ata/sata_fsl.c

static int sata_fsl_probe(struct of_device *ofdev,
            const struct of_device_id *match)
{
    host_priv = kzalloc(sizeof(struct sata_fsl_host_priv), GFP_KERNEL);
    if (!host_priv)
        goto error_exit_with_cleanup;

    irq = irq_of_parse_and_map(ofdev->node, 0);
    if (irq < 0) {
        dev_printk(KERN_ERR, &ofdev->dev, "invalid irq from platform\n");
        goto error_exit_with_cleanup;
    }
    host_priv->irq = irq;

    /* allocate host structure */
    host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_FSL_MAX_PORTS);

    /* host->iomap is not used currently */
    host->private_data = host_priv;

    /* initialize host controller */
    sata_fsl_init_controller(host);

    /*
     * Now, register with libATA core, this will also initiate the
     * device discovery process, invoking our port_start() handler &
     * error_handler() to execute a dummy Softreset EH session
     */
    ata_host_activate(host, irq, sata_fsl_interrupt, SATA_FSL_IRQ_FLAG,
              &sata_fsl_sht);


    dev_set_drvdata(&ofdev->dev, host);

    return 0;


函数ata_host_activate申请了中断,并调用ata_host_register函数注册host

linux/driver/ata/libata-core.c

/**
 *    ata_host_activate - start host, request IRQ and register it
 *    @host: target ATA host
 *    @irq: IRQ to request
 *    @irq_handler: irq_handler used when requesting IRQ
 *    @irq_flags: irq_flags used when requesting IRQ
 *    @sht: scsi_host_template to use when registering the host
 *
 *    After allocating an ATA host and initializing it, most libata
 *    LLDs perform three steps to activate the host - start host,
 *    request IRQ and register it.  This helper takes necessasry
 *    arguments and performs the three steps in one go.
 *
 *    An invalid IRQ skips the IRQ registration and expects the host to
 *    have set polling mode on the port. In this case, @irq_handler
 *    should be NULL.
 *
 *    LOCKING:
 *    Inherited from calling layer (may sleep).
 *
 *    RETURNS:
 *    0 on success, -errno otherwise.
 */
int ata_host_activate(struct ata_host *host, int irq,
              irq_handler_t irq_handler, unsigned long irq_flags,
              struct scsi_host_template *sht)
{
    int i, rc;

    rc = ata_host_start(host);
    if (rc)
        return rc;

    /* Special case for polling mode */
    if (!irq) {
        WARN_ON(irq_handler);
        return ata_host_register(host, sht);
    }

    rc = devm_request_irq(host->dev, irq, irq_handler, irq_flags,
                  dev_driver_string(host->dev), host);
    if (rc)
        return rc;

    for (i = 0; i < host->n_ports; i++)
        ata_port_desc(host->ports[i], "irq %d", irq);

    rc = ata_host_register(host, sht);
    /* if failed, just free the IRQ and leave ports alone */
    if (rc)
        devm_free_irq(host->dev, irq, host);

    return rc;
}


linux/driver/ata/libata-core.c

/**
 *    ata_host_register - register initialized ATA host
 *    @host: ATA host to register
 *    @sht: template for SCSI host
 *
 *    Register initialized ATA host.  @host is allocated using
 *    ata_host_alloc() and fully initialized by LLD.  This function
 *    starts ports, registers @host with ATA and SCSI layers and
 *    probe registered devices.
 *
 *    LOCKING:
 *    Inherited from calling layer (may sleep).
 *
 *    RETURNS:
 *    0 on success, -errno otherwise.
 */
int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
{
    int i, rc;

    /* host must have been started */
    if (!(host->flags & ATA_HOST_STARTED)) {
        dev_printk(KERN_ERR, host->dev,
               "BUG: trying to register unstarted host\n");
        WARN_ON(1);
        return -EINVAL;
    }

    /* Blow away unused ports.  This happens when LLD can't
     * determine the exact number of ports to allocate at
     * allocation time.
     */
    for (i = host->n_ports; host->ports[i]; i++)
        kfree(host->ports[i]);

    /* give ports names and add SCSI hosts */
    for (i = 0; i < host->n_ports; i++)
        host->ports[i]->print_id = ata_print_id++;

    rc = ata_scsi_add_hosts(host, sht);
    if (rc)
        return rc;

    /* associate with ACPI nodes */
    ata_acpi_associate(host);

    /* set cable, sata_spd_limit and report */
    for (i = 0; i < host->n_ports; i++) {
        struct ata_port *ap = host->ports[i];
        unsigned long xfer_mask;

        /* set SATA cable type if still unset */
        if (ap->cbl == ATA_CBL_NONE && (ap->flags & ATA_FLAG_SATA))
            ap->cbl = ATA_CBL_SATA;

        /* init sata_spd_limit to the current value */
        sata_link_init_spd(&ap->link);

        /* print per-port info to dmesg */
        xfer_mask = ata_pack_xfermask(ap->pio_mask, ap->mwdma_mask,
                          ap->udma_mask);

        if (!ata_port_is_dummy(ap)) {
            ata_port_printk(ap, KERN_INFO,
                    "%cATA max %s %s\n",
                    (ap->flags & ATA_FLAG_SATA) ? 'S' : 'P',
                    ata_mode_string(xfer_mask),
                    ap->link.eh_info.desc);
            ata_ehi_clear_desc(&ap->link.eh_info);
        } else
            ata_port_printk(ap, KERN_INFO, "DUMMY\n");
    }

    /* perform each probe synchronously */
    DPRINTK("probe begin\n");
    for (i = 0; i < host->n_ports; i++) {
        struct ata_port *ap = host->ports[i];
        int rc;

        /* probe */
        if (ap->ops->error_handler) {
            struct ata_eh_info *ehi = &ap->link.eh_info;
            unsigned long flags;

            ata_port_probe(ap);

            /* kick EH for boot probing */
            spin_lock_irqsave(ap->lock, flags);

            ehi->probe_mask =
                (1 << ata_link_max_devices(&ap->link)) - 1;
            ehi->action |= ATA_EH_SOFTRESET;
            ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;

            ap->pflags &= ~ATA_PFLAG_INITIALIZING;
            ap->pflags |= ATA_PFLAG_LOADING;
            ata_port_schedule_eh(ap);

            spin_unlock_irqrestore(ap->lock, flags);

            /* wait for EH to finish */
            ata_port_wait_eh(ap);

        } else {
            DPRINTK("ata%u: bus probe begin\n", ap->print_id);
            rc = ata_bus_probe(ap);
            DPRINTK("ata%u: bus probe end\n", ap->print_id);

            if (rc) {
                /* FIXME: do something useful here?
                 * Current libata behavior will
                 * tear down everything when
                 * the module is removed
                 * or the h/w is unplugged.
                 */
            }
        }
    }

    /* probes are done, now scan each port's disk(s) */
    for (i = 0; i < host->n_ports; i++) {
        struct ata_port *ap = host->ports[i];

        ata_scsi_scan_host(ap, 1);
        ata_lpm_schedule(ap, ap->pm_policy);
    }

    return 0;
}
在ata_scsi_add_hosts函数启动了error_handler内核线程,之后会在红色第二部分代码执行该线程,直到初始华完毕,第三部分主要初始化每个硬盘设备(包括分配硬盘设备节点等)。

linux/driver/ata/libata-scsi.c
int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
{
    int i, rc;

    for (i = 0; i < host->n_ports; i++) {
        struct ata_port *ap = host->ports[i];
        struct Scsi_Host *shost;

        rc = -ENOMEM;
        shost = scsi_host_alloc(sht, sizeof(struct ata_port *));
        if (!shost)
            goto err_alloc;

        *(struct ata_port **)&shost->hostdata[0] = ap;
        ap->scsi_host = shost;

        shost->transportt = &ata_scsi_transport_template;
        shost->unique_id = ap->print_id;
        shost->max_id = 16;
        shost->max_lun = 1;
        shost->max_channel = 1;
        shost->max_cmd_len = 16;

        /* Schedule policy is determined by ->qc_defer()
         * callback and it needs to see every deferred qc.
         * Set host_blocked to 1 to prevent SCSI midlayer from
         * automatically deferring requests.
         */
        shost->max_host_blocked = 1;

        rc = scsi_add_host(ap->scsi_host, ap->host->dev);
        if (rc)
            goto err_add;
    }

    return 0;

 err_add:
    scsi_host_put(host->ports[i]->scsi_host);
 err_alloc:
    while (--i >= 0) {
        struct Scsi_Host *shost = host->ports[i]->scsi_host;

        scsi_remove_host(shost);
        scsi_host_put(shost);
    }
    return rc;
}
ata_scsi_add_hosts主要初始化scsi层需要的结构,然后注册到scsi模块,完成scsi与ata的连接。

linux/driver/scsi/hosts.c

/**
 * scsi_host_alloc - register a scsi host adapter instance.
 * @sht:    pointer to scsi host template
 * @privsize:    extra bytes to allocate for driver
 *
 * Note:
 *     Allocate a new Scsi_Host and perform basic initialization.
 *     The host is not published to the scsi midlayer until scsi_add_host
 *     is called.
 *
 * Return value:
 *     Pointer to a new Scsi_Host
 **/
struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
{
    struct Scsi_Host *shost;
    gfp_t gfp_mask = GFP_KERNEL;
    int rval;

    if (sht->unchecked_isa_dma && privsize)
        gfp_mask |= __GFP_DMA;

    shost = kzalloc(sizeof(struct Scsi_Host) + privsize, gfp_mask);
    if (!shost)
        return NULL;

    shost->host_lock = &shost->default_lock;
    spin_lock_init(shost->host_lock);
    shost->shost_state = SHOST_CREATED;
    INIT_LIST_HEAD(&shost->__devices);
    INIT_LIST_HEAD(&shost->__targets);
    INIT_LIST_HEAD(&shost->eh_cmd_q);
    INIT_LIST_HEAD(&shost->starved_list);
    init_waitqueue_head(&shost->host_wait);

    mutex_init(&shost->scan_mutex);

    shost->host_no = scsi_host_next_hn++; /* XXX(hch): still racy */
    shost->dma_channel = 0xff;

    /* These three are default values which can be overridden */
    shost->max_channel = 0;
    shost->max_id = 8;
    shost->max_lun = 8;

    /* Give each shost a default transportt */
    shost->transportt = &blank_transport_template;

    /*
     * All drivers right now should be able to handle 12 byte
     * commands.  Every so often there are requests for 16 byte
     * commands, but individual low-level drivers need to certify that
     * they actually do something sensible with such commands.
     */
    shost->max_cmd_len = 12;
    shost->hostt = sht;
    shost->this_id = sht->this_id;
    shost->can_queue = sht->can_queue;
    shost->sg_tablesize = sht->sg_tablesize;
    shost->cmd_per_lun = sht->cmd_per_lun;
    shost->unchecked_isa_dma = sht->unchecked_isa_dma;
    shost->use_clustering = sht->use_clustering;
    shost->ordered_tag = sht->ordered_tag;
    shost->active_mode = sht->supported_mode;
    shost->use_sg_chaining = sht->use_sg_chaining;

    if (sht->supported_mode == MODE_UNKNOWN)
        /* means we didn't set it ... default to INITIATOR */
        shost->active_mode = MODE_INITIATOR;
    else
        shost->active_mode = sht->supported_mode;

    if (sht->max_host_blocked)
        shost->max_host_blocked = sht->max_host_blocked;
    else
        shost->max_host_blocked = SCSI_DEFAULT_HOST_BLOCKED;

    /*
     * If the driver imposes no hard sector transfer limit, start at
     * machine infinity initially.
     */
    if (sht->max_sectors)
        shost->max_sectors = sht->max_sectors;
    else
        shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS;

    /*
     * assume a 4GB boundary, if not set
     */
    if (sht->dma_boundary)
        shost->dma_boundary = sht->dma_boundary;
    else
        shost->dma_boundary = 0xffffffff;

    rval = scsi_setup_command_freelist(shost);
    if (rval)
        goto fail_kfree;

    device_initialize(&shost->shost_gendev);
    snprintf(shost->shost_gendev.bus_id, BUS_ID_SIZE, "host%d",
        shost->host_no);
    shost->shost_gendev.release = scsi_host_dev_release;

    class_device_initialize(&shost->shost_classdev);
    shost->shost_classdev.dev = &shost->shost_gendev;
    shost->shost_classdev.class = &shost_class;
    snprintf(shost->shost_classdev.class_id, BUS_ID_SIZE, "host%d",
          shost->host_no);

    shost->ehandler = kthread_run(scsi_error_handler, shost,
            "scsi_eh_%d", shost->host_no);

    if (IS_ERR(shost->ehandler)) {
        rval = PTR_ERR(shost->ehandler);
        goto fail_destroy_freelist;
    }

    scsi_proc_hostdir_add(shost->hostt);
    return shost;

 fail_destroy_freelist:
    scsi_destroy_command_freelist(shost);
 fail_kfree:
    kfree(shost);
    return NULL;
}
EXPORT_SYMBOL(scsi_host_alloc);
scsi_alloc_hosts执行完,内核即多了一个线程执行scsi_error_handler, ata_scsi_add_hosts继续初始话scsi_host结构体,其中:
        shost->transportt = &ata_scsi_transport_template;会在scsi_error_handler调用。

/**
 * scsi_error_handler - SCSI error handler thread
 * @data:    Host for which we are running.
 *
 * Notes:
 *    This is the main error handling loop.  This is run as a kernel thread
 *    for every SCSI host and handles all error handling activity.
 **/
int scsi_error_handler(void *data)
{
    struct Scsi_Host *shost = data;

    /*
     * We use TASK_INTERRUPTIBLE so that the thread is not
     * counted against the load average as a running process.
     * We never actually get interrupted because kthread_run
     * disables singal delivery for the created thread.
     */
    set_current_state(TASK_INTERRUPTIBLE);
    while (!kthread_should_stop()) {
        if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
            shost->host_failed != shost->host_busy) {
            SCSI_LOG_ERROR_RECOVERY(1,
                printk("Error handler scsi_eh_%d sleeping\n",
                    shost->host_no));
            schedule();
            set_current_state(TASK_INTERRUPTIBLE);
            continue;
        }

        __set_current_state(TASK_RUNNING);
        SCSI_LOG_ERROR_RECOVERY(1,
            printk("Error handler scsi_eh_%d waking up\n",
                shost->host_no));

        /*
         * We have a host that is failing for some reason.  Figure out
         * what we need to do to get it up and online again (if we can).
         * If we fail, we end up taking the thing offline.
         */
        if (shost->transportt->eh_strategy_handler)
            shost->transportt->eh_strategy_handler(shost);
        else
            scsi_unjam_host(shost);

        /*
         * Note - if the above fails completely, the action is to take
         * individual devices offline and flush the queue of any
         * outstanding requests that may have been pending.  When we
         * restart, we restart any I/O to any other devices on the bus
         * which are still online.
         */
        scsi_restart_operations(shost);
        set_current_state(TASK_INTERRUPTIBLE);
    }
    __set_current_state(TASK_RUNNING);

    SCSI_LOG_ERROR_RECOVERY(1,
        printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
    shost->ehandler = NULL;
    return 0;

}



上次写到系统在运行scsi_error_handle线程之前的初始化过程,系统会在libata-core.c 函数ata_host_register中等待该线程的执行,现在我们从这个线程开始执行:

linux/driver/scsi/scsi_error.c
/**
 * scsi_error_handler - SCSI error handler thread
 * @data:    Host for which we are running.
 *
 * Notes:
 *    This is the main error handling loop.  This is run as a kernel thread
 *    for every SCSI host and handles all error handling activity.
 **/
int scsi_error_handler(void *data)
{
    struct Scsi_Host *shost = data;

    /*
     * We use TASK_INTERRUPTIBLE so that the thread is not
     * counted against the load average as a running process.
     * We never actually get interrupted because kthread_run
     * disables singal delivery for the created thread.
     */
    set_current_state(TASK_INTERRUPTIBLE);
    while (!kthread_should_stop()) {
        if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
            shost->host_failed != shost->host_busy) {
            SCSI_LOG_ERROR_RECOVERY(1,
                printk("Error handler scsi_eh_%d sleeping\n",
                    shost->host_no));
            schedule();
            set_current_state(TASK_INTERRUPTIBLE);
            continue;
        }

        __set_current_state(TASK_RUNNING);
        SCSI_LOG_ERROR_RECOVERY(1,
            printk("Error handler scsi_eh_%d waking up\n",
                shost->host_no));

        /*
         * We have a host that is failing for some reason.  Figure out
         * what we need to do to get it up and online again (if we can).
         * If we fail, we end up taking the thing offline.
         */
        if (shost->transportt->eh_strategy_handler)
            shost->transportt->eh_strategy_handler(shost);
        else
            scsi_unjam_host(shost);

        /*
         * Note - if the above fails completely, the action is to take
         * individual devices offline and flush the queue of any
         * outstanding requests that may have been pending.  When we
         * restart, we restart any I/O to any other devices on the bus
         * which are still online.
         */
        scsi_restart_operations(shost);
        set_current_state(TASK_INTERRUPTIBLE);
    }
    __set_current_state(TASK_RUNNING);

    SCSI_LOG_ERROR_RECOVERY(1,
        printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
    shost->ehandler = NULL;
    return 0;
}

shost->transportt->eh_strategy_handler(shost);
 此处是一个函数指针调用,在之前的文章我已经详细说了这个指针的由来,现在我们看一下这个指针的赋值:

linux/driver/ata/libata-scsi.c

/*
 * libata transport template.  libata doesn't do real transport stuff.
 * It just needs the eh_timed_out hook.
 */
static struct scsi_transport_template ata_scsi_transport_template = {
    .eh_strategy_handler    = ata_scsi_error,
    .eh_timed_out        = ata_scsi_timed_out,
    .user_scan        = ata_scsi_user_scan,
};

该结构在ata_scsi_add_host函数中被传递到上面,上一篇文章对此有提及,可以参考该系列的上一篇文章。

下面我们看一下ata_scsi_error的由来:

linux/driver/ata/libata-eh.c
/**
 *    ata_scsi_error - SCSI layer error handler callback
 *    @host: SCSI host on which error occurred
 *
 *    Handles SCSI-layer-thrown error events.
 *
 *    LOCKING:
 *    Inherited from SCSI layer (none, can sleep)
 *
 *    RETURNS:
 *    Zero.
 */
void ata_scsi_error(struct Scsi_Host *host)
{
    struct ata_port *ap = ata_shost_to_port(host);
    int i;
    unsigned long flags;

    DPRINTK("ENTER\n");

    /* synchronize with port task */
    ata_port_flush_task(ap);

    /* synchronize with host lock and sort out timeouts */

    /* For new EH, all qcs are finished in one of three ways -
     * normal completion, error completion, and SCSI timeout.
     * Both cmpletions can race against SCSI timeout.  When normal
     * completion wins, the qc never reaches EH.  When error
     * completion wins, the qc has ATA_QCFLAG_FAILED set.
     *
     * When SCSI timeout wins, things are a bit more complex.
     * Normal or error completion can occur after the timeout but
     * before this point.  In such cases, both types of
     * completions are honored.  A scmd is determined to have
     * timed out iff its associated qc is active and not failed.
     */
    if (ap->ops->error_handler) {
        struct scsi_cmnd *scmd, *tmp;
        int nr_timedout = 0;

        spin_lock_irqsave(ap->lock, flags);

        list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
            struct ata_queued_cmd *qc;

            for (i = 0; i < ATA_MAX_QUEUE; i++) {
                qc = __ata_qc_from_tag(ap, i);
                if (qc->flags & ATA_QCFLAG_ACTIVE &&
                    qc->scsicmd == scmd)
                    break;
            }

            if (i < ATA_MAX_QUEUE) {
                /* the scmd has an associated qc */
                if (!(qc->flags & ATA_QCFLAG_FAILED)) {
                    /* which hasn't failed yet, timeout */
                    qc->err_mask |= AC_ERR_TIMEOUT;
                    qc->flags |= ATA_QCFLAG_FAILED;
                    nr_timedout++;
                }
            } else {
                /* Normal completion occurred after
                 * SCSI timeout but before this point.
                 * Successfully complete it.
                 */
                scmd->retries = scmd->allowed;
                scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
            }
        }

        /* If we have timed out qcs.  They belong to EH from
         * this point but the state of the controller is
         * unknown.  Freeze the port to make sure the IRQ
         * handler doesn't diddle with those qcs.  This must
         * be done atomically w.r.t. setting QCFLAG_FAILED.
         */
        if (nr_timedout)
            __ata_port_freeze(ap);

        spin_unlock_irqrestore(ap->lock, flags);

        /* initialize eh_tries */
        ap->eh_tries = ATA_EH_MAX_TRIES;
    } else
        spin_unlock_wait(ap->lock);

 repeat:
    /* invoke error handler */
    if (ap->ops->error_handler) {
        struct ata_link *link;

        /* kill fast drain timer */
        del_timer_sync(&ap->fastdrain_timer);

        /* process port resume request */
        ata_eh_handle_port_resume(ap);

        /* fetch & clear EH info */
        spin_lock_irqsave(ap->lock, flags);

        __ata_port_for_each_link(link, ap) {
            memset(&link->eh_context, 0, sizeof(link->eh_context));
            link->eh_context.i = link->eh_info;
            memset(&link->eh_info, 0, sizeof(link->eh_info));
        }

        ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
        ap->pflags &= ~ATA_PFLAG_EH_PENDING;
        ap->excl_link = NULL;    /* don't maintain exclusion over EH */

        spin_unlock_irqrestore(ap->lock, flags);

        /* invoke EH, skip if unloading or suspended */
        if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
            ap->ops->error_handler(ap);

        else
            ata_eh_finish(ap);

        /* process port suspend request */
        ata_eh_handle_port_suspend(ap);

        /* Exception might have happend after ->error_handler
         * recovered the port but before this point.  Repeat
         * EH in such case.
         */
        spin_lock_irqsave(ap->lock, flags);

        if (ap->pflags & ATA_PFLAG_EH_PENDING) {
            if (--ap->eh_tries) {
                spin_unlock_irqrestore(ap->lock, flags);
                goto repeat;
            }
            ata_port_printk(ap, KERN_ERR, "EH pending after %d "
                    "tries, giving up\n", ATA_EH_MAX_TRIES);
            ap->pflags &= ~ATA_PFLAG_EH_PENDING;
        }

        /* this run is complete, make sure EH info is clear */
        __ata_port_for_each_link(link, ap)
            memset(&link->eh_info, 0, sizeof(link->eh_info));

        /* Clear host_eh_scheduled while holding ap->lock such
         * that if exception occurs after this point but
         * before EH completion, SCSI midlayer will
         * re-initiate EH.
         */
        host->host_eh_scheduled = 0;

        spin_unlock_irqrestore(ap->lock, flags);
    } else {
        WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
        ap->ops->eng_timeout(ap);
    }

    /* finish or retry handled scmd's and clean up */
    WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));

    scsi_eh_flush_done_q(&ap->eh_done_q);

    /* clean up */
    spin_lock_irqsave(ap->lock, flags);

    if (ap->pflags & ATA_PFLAG_LOADING)
        ap->pflags &= ~ATA_PFLAG_LOADING;
    else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
        queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);

    if (ap->pflags & ATA_PFLAG_RECOVERED)
        ata_port_printk(ap, KERN_INFO, "EH complete\n");

    ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);

    /* tell wait_eh that we're done */
    ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
    wake_up_all(&ap->eh_wait_q);

    spin_unlock_irqrestore(ap->lock, flags);

    DPRINTK("EXIT\n");
}
注意在这个函数的后面唤醒了之前我们强制等待的初始化主线程,这个函数执行完后这个内核线程执行完了一个周期,等待下一次的唤醒。 我们继续分析这个函数中是如何开始扫描每一个sata口的。

linux/driver/ata/sata_fsl.c

static const struct ata_port_info sata_fsl_port_info[] = {
    {
     .flags = SATA_FSL_HOST_FLAGS,
     .link_flags = SATA_FSL_HOST_LFLAGS,
     .pio_mask = 0x1f,    /* pio 0-4 */
     .udma_mask = 0x7f,    /* udma 0-6 */
     .port_ops = &sata_fsl_ops,
     },
};

static const struct ata_port_operations sata_fsl_ops = {
    .check_status = sata_fsl_check_status,
    .check_altstatus = sata_fsl_check_status,
    .dev_select = ata_noop_dev_select,

    .tf_read = sata_fsl_tf_read,

    .qc_prep = sata_fsl_qc_prep,
    .qc_issue = sata_fsl_qc_issue,
    .irq_clear = sata_fsl_irq_clear,

    .scr_read = sata_fsl_scr_read,
    .scr_write = sata_fsl_scr_write,

    .freeze = sata_fsl_freeze,
    .thaw = sata_fsl_thaw,
    .error_handler = sata_fsl_error_handler,
    .post_internal_cmd = sata_fsl_post_internal_cmd,

    .port_start = sata_fsl_port_start,
    .port_stop = sata_fsl_port_stop,

    .pmp_attach = sata_fsl_pmp_attach,
    .pmp_detach = sata_fsl_pmp_detach,
};

static void sata_fsl_error_handler(struct ata_port *ap)
{

    DPRINTK("in xx_error_handler\n");

    /* perform recovery */
    sata_pmp_do_eh(ap, ata_std_prereset, sata_fsl_softreset,
               sata_std_hardreset, ata_std_postreset,
               sata_pmp_std_prereset, sata_fsl_pmp_softreset,
               sata_pmp_std_hardreset, sata_pmp_std_postreset);

}
这个函数比较简单,是在在驱动中提供了sata_pmp_do_eh的一个接口, 因为mpc8315 CPU SATA 接口支持PM功能,所以这里使用pmp模块的函数处理。

/linux/driver/libata-pmp.c

/**
 *    sata_pmp_do_eh - do standard error handling for PMP-enabled host
 *    @ap: host port to handle error for
 *    @prereset: prereset method (can be NULL)
 *    @softreset: softreset method
 *    @hardreset: hardreset method
 *    @postreset: postreset method (can be NULL)
 *    @pmp_prereset: PMP prereset method (can be NULL)
 *    @pmp_softreset: PMP softreset method (can be NULL)
 *    @pmp_hardreset: PMP hardreset method (can be NULL)
 *    @pmp_postreset: PMP postreset method (can be NULL)
 *
 *    Perform standard error handling sequence for PMP-enabled host
 *    @ap.
 *
 *    LOCKING:
 *    Kernel thread context (may sleep).
 */
void sata_pmp_do_eh(struct ata_port *ap,
        ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
        ata_reset_fn_t hardreset, ata_postreset_fn_t postreset,
        ata_prereset_fn_t pmp_prereset, ata_reset_fn_t pmp_softreset,
        ata_reset_fn_t pmp_hardreset, ata_postreset_fn_t pmp_postreset)
{
    DPRINTK("ENTER\n");
    ata_eh_autopsy(ap);
    ata_eh_report(ap);
    sata_pmp_eh_recover(ap, prereset, softreset, hardreset, postreset,
                pmp_prereset, pmp_softreset, pmp_hardreset,
                pmp_postreset);

    ata_eh_finish(ap);
    DPRINTK("EXIT\n");
}
到这里往下的部分已经很简单了,读者可以自己查找代码进一步的阅读,我们主要分析函数sata_pmp_eh_recover.


/**
 *    sata_pmp_eh_recover - recover PMP-enabled port
 *    @ap: ATA port to recover
 *    @prereset: prereset method (can be NULL)
 *    @softreset: softreset method
 *    @hardreset: hardreset method
 *    @postreset: postreset method (can be NULL)
 *    @pmp_prereset: PMP prereset method (can be NULL)
 *    @pmp_softreset: PMP softreset method (can be NULL)
 *    @pmp_hardreset: PMP hardreset method (can be NULL)
 *    @pmp_postreset: PMP postreset method (can be NULL)
 *
 *    Drive EH recovery operation for PMP enabled port @ap.  This
 *    function recovers host and PMP ports with proper retrials and
 *    fallbacks.  Actual recovery operations are performed using
 *    ata_eh_recover() and sata_pmp_eh_recover_pmp().
 *
 *    LOCKING:
 *    Kernel thread context (may sleep).
 *
 *    RETURNS:
 *    0 on success, -errno on failure.
 */
static int sata_pmp_eh_recover(struct ata_port *ap,
        ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
        ata_reset_fn_t hardreset, ata_postreset_fn_t postreset,
        ata_prereset_fn_t pmp_prereset, ata_reset_fn_t pmp_softreset,
        ata_reset_fn_t pmp_hardreset, ata_postreset_fn_t pmp_postreset)
{
    int pmp_tries, link_tries[SATA_PMP_MAX_PORTS];
    struct ata_link *pmp_link = &ap->link;
    struct ata_device *pmp_dev = pmp_link->device;
    struct ata_eh_context *pmp_ehc = &pmp_link->eh_context;
    struct ata_link *link;
    struct ata_device *dev;
    unsigned int err_mask;
    u32 gscr_error, sntf;
    int cnt, rc;

    pmp_tries = ATA_EH_PMP_TRIES;
    ata_port_for_each_link(link, ap)
        link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;

 retry:
    /* PMP attached? */
    if (!ap->nr_pmp_links) {
        rc = ata_eh_recover(ap, prereset, softreset, hardreset,
                    postreset, NULL);
        if (rc) {
            ata_link_for_each_dev(dev, &ap->link)
                ata_dev_disable(dev);
            return rc;
        }

        if (pmp_dev->class != ATA_DEV_PMP)
            return 0;

        /* new PMP online */
        ata_port_for_each_link(link, ap)
            link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;

        /* fall through */
    }

    /* recover pmp */
    rc = sata_pmp_eh_recover_pmp(ap, prereset, softreset, hardreset,
                     postreset);
    if (rc)
        goto pmp_fail;

    /* handle disabled links */
    rc = sata_pmp_eh_handle_disabled_links(ap);
    if (rc)
        goto pmp_fail;

    /* recover links */
    rc = ata_eh_recover(ap, pmp_prereset, pmp_softreset, pmp_hardreset,
                pmp_postreset, &link);

    if (rc)
        goto link_fail;

    /* Connection status might have changed while resetting other
     * links, check SATA_PMP_GSCR_ERROR before returning.
     */

    /* clear SNotification */
    rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
    if (rc == 0)
        sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);

    /* enable notification */
    if (pmp_dev->flags & ATA_DFLAG_AN) {
        pmp_dev->gscr[SATA_PMP_GSCR_FEAT_EN] |= SATA_PMP_FEAT_NOTIFY;

        err_mask = sata_pmp_write(pmp_dev->link, SATA_PMP_GSCR_FEAT_EN,
                      pmp_dev->gscr[SATA_PMP_GSCR_FEAT_EN]);
        if (err_mask) {
            ata_dev_printk(pmp_dev, KERN_ERR, "failed to write "
                       "PMP_FEAT_EN (Emask=0x%x)\n", err_mask);
            rc = -EIO;
            goto pmp_fail;
        }
    }

    /* check GSCR_ERROR */
    err_mask = sata_pmp_read(pmp_link, SATA_PMP_GSCR_ERROR, &gscr_error);
    if (err_mask) {
        ata_dev_printk(pmp_dev, KERN_ERR, "failed to read "
                   "PMP_GSCR_ERROR (Emask=0x%x)\n", err_mask);
        rc = -EIO;
        goto pmp_fail;
    }

    cnt = 0;
    ata_port_for_each_link(link, ap) {
        if (!(gscr_error & (1 << link->pmp)))
            continue;

        if (sata_pmp_handle_link_fail(link, link_tries)) {
            ata_ehi_hotplugged(&link->eh_context.i);
            cnt++;
        } else {
            ata_link_printk(link, KERN_WARNING,
                "PHY status changed but maxed out on retries, "
                "giving up\n");
            ata_link_printk(link, KERN_WARNING,
                "Manully issue scan to resume this link\n");
        }
    }

    if (cnt) {
        ata_port_printk(ap, KERN_INFO, "PMP SError.N set for some "
                "ports, repeating recovery\n");
        goto retry;
    }

    return 0;

 link_fail:
    if (sata_pmp_handle_link_fail(link, link_tries)) {
        pmp_ehc->i.action |= ATA_EH_HARDRESET;
        goto retry;
    }

    /* fall through */
 pmp_fail:
    /* Control always ends up here after detaching PMP.  Shut up
     * and return if we're unloading.
     */
    if (ap->pflags & ATA_PFLAG_UNLOADING)
        return rc;

    if (!ap->nr_pmp_links)
        goto retry;

    if (--pmp_tries) {
        ata_port_printk(ap, KERN_WARNING,
                "failed to recover PMP, retrying in 5 secs\n");
        pmp_ehc->i.action |= ATA_EH_HARDRESET;
        ssleep(5);
        goto retry;
    }

    ata_port_printk(ap, KERN_ERR,
            "failed to recover PMP after %d tries, giving up\n",
            ATA_EH_PMP_TRIES);
    sata_pmp_detach(pmp_dev);
    ata_dev_disable(pmp_dev);

    return rc;
}

/linux/driver/ata/libata_eh.c
/**
 *    ata_eh_recover - recover host port after error
 *    @ap: host port to recover
 *    @prereset: prereset method (can be NULL)
 *    @softreset: softreset method (can be NULL)
 *    @hardreset: hardreset method (can be NULL)
 *    @postreset: postreset method (can be NULL)
 *    @r_failed_link: out parameter for failed link
 *
 *    This is the alpha and omega, eum and yang, heart and soul of
 *    libata exception handling.  On entry, actions required to
 *    recover each link and hotplug requests are recorded in the
 *    link's eh_context.  This function executes all the operations
 *    with appropriate retrials and fallbacks to resurrect failed
 *    devices, detach goners and greet newcomers.
 *
 *    LOCKING:
 *    Kernel thread context (may sleep).
 *
 *    RETURNS:
 *    0 on success, -errno on failure.
 */
int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
           ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
           ata_postreset_fn_t postreset,
           struct ata_link **r_failed_link)
{
    struct ata_link *link;
    struct ata_device *dev;
    int nr_failed_devs, nr_disabled_devs;
    int reset, rc;
    unsigned long flags;

    DPRINTK("ENTER\n");

    /* prep for recovery */
    ata_port_for_each_link(link, ap) {
        struct ata_eh_context *ehc = &link->eh_context;

        /* re-enable link? */
        if (ehc->i.action & ATA_EH_ENABLE_LINK) {
            ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
            spin_lock_irqsave(ap->lock, flags);
            link->flags &= ~ATA_LFLAG_DISABLED;
            spin_unlock_irqrestore(ap->lock, flags);
            ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
        }

        ata_link_for_each_dev(dev, link) {
            if (link->flags & ATA_LFLAG_NO_RETRY)
                ehc->tries[dev->devno] = 1;
            else
                ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;

            /* collect port action mask recorded in dev actions */
            ehc->i.action |= ehc->i.dev_action[dev->devno] &
                     ~ATA_EH_PERDEV_MASK;
            ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;

            /* process hotplug request */
            if (dev->flags & ATA_DFLAG_DETACH)
                ata_eh_detach_dev(dev);

            if (!ata_dev_enabled(dev) &&
                ((ehc->i.probe_mask & (1 << dev->devno)) &&
                 !(ehc->did_probe_mask & (1 << dev->devno)))) {
                ata_eh_detach_dev(dev);
                ata_dev_init(dev);
                ehc->did_probe_mask |= (1 << dev->devno);
                ehc->i.action |= ATA_EH_SOFTRESET;
            }
        }
    }

 retry:
    rc = 0;
    nr_failed_devs = 0;
    nr_disabled_devs = 0;
    reset = 0;

    /* if UNLOADING, finish immediately */
    if (ap->pflags & ATA_PFLAG_UNLOADING)
        goto out;

    /* prep for EH */
    ata_port_for_each_link(link, ap) {
        struct ata_eh_context *ehc = &link->eh_context;

        /* skip EH if possible. */
        if (ata_eh_skip_recovery(link))
            ehc->i.action = 0;

        /* do we need to reset? */
        if (ehc->i.action & ATA_EH_RESET_MASK)
            reset = 1;

        ata_link_for_each_dev(dev, link)
            ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
    }

    /* reset */
    if (reset) {
        /* if PMP is attached, this function only deals with
         * downstream links, port should stay thawed.
         */
        if (!ap->nr_pmp_links)
            ata_eh_freeze_port(ap);

        ata_port_for_each_link(link, ap) {
            struct ata_eh_context *ehc = &link->eh_context;

            if (!(ehc->i.action & ATA_EH_RESET_MASK))
                continue;

            rc = ata_eh_reset(link, ata_link_nr_vacant(link),
                      prereset, softreset, hardreset,
                      postreset);

            if (rc) {
                ata_link_printk(link, KERN_ERR,
                        "reset failed, giving up\n");
                goto out;
            }
        }

        if (!ap->nr_pmp_links)
            ata_eh_thaw_port(ap);
    }

    /* the rest */
    ata_port_for_each_link(link, ap) {
        struct ata_eh_context *ehc = &link->eh_context;

        /* revalidate existing devices and attach new ones */
        rc = ata_eh_revalidate_and_attach(link, &dev);
        if (rc)
            goto dev_fail;

        /* if PMP got attached, return, pmp EH will take care of it */
        if (link->device->class == ATA_DEV_PMP) {
            ehc->i.action = 0;
            return 0;
        }

        /* configure transfer mode if necessary */
        if (ehc->i.flags & ATA_EHI_SETMODE) {
            rc = ata_set_mode(link, &dev);
            if (rc)
                goto dev_fail;
            ehc->i.flags &= ~ATA_EHI_SETMODE;
        }

        if (ehc->i.action & ATA_EHI_LPM)
            ata_link_for_each_dev(dev, link)
                ata_dev_enable_pm(dev, ap->pm_policy);

        /* this link is okay now */
        ehc->i.flags = 0;
        continue;

dev_fail:
        nr_failed_devs++;
        if (ata_eh_handle_dev_fail(dev, rc))
            nr_disabled_devs++;

        if (ap->pflags & ATA_PFLAG_FROZEN) {
            /* PMP reset requires working host port.
             * Can't retry if it's frozen.
             */
            if (ap->nr_pmp_links)
                goto out;
            break;
        }
    }

    if (nr_failed_devs) {
        if (nr_failed_devs != nr_disabled_devs) {
            ata_port_printk(ap, KERN_WARNING, "failed to recover "
                    "some devices, retrying in 5 secs\n");
            ssleep(5);
        } else {
            /* no device left to recover, repeat fast */
            msleep(500);
        }

        goto retry;
    }

 out:
    if (rc && r_failed_link)
        *r_failed_link = link;

    DPRINTK("EXIT, rc=%d\n", rc);
    return rc;
}

int ata_eh_reset(struct ata_link *link, int classify,
         ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
         ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
{
    const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts);
    struct ata_port *ap = link->ap;
    struct ata_eh_context *ehc = &link->eh_context;
    unsigned int *classes = ehc->classes;
    unsigned int lflags = link->flags;
    int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
    int try = 0;
    struct ata_device *dev;
    unsigned long deadline, now;
    unsigned int tmp_action;
    ata_reset_fn_t reset;
    unsigned long flags;
    u32 sstatus;
    int rc;

    /* about to reset */
    spin_lock_irqsave(ap->lock, flags);
    ap->pflags |= ATA_PFLAG_RESETTING;
    spin_unlock_irqrestore(ap->lock, flags);

    ata_eh_about_to_do(link, NULL, ehc->i.action & ATA_EH_RESET_MASK);

    ata_link_for_each_dev(dev, link) {
        /* If we issue an SRST then an ATA drive (not ATAPI)
         * may change configuration and be in PIO0 timing. If
         * we do a hard reset (or are coming from power on)
         * this is true for ATA or ATAPI. Until we've set a
         * suitable controller mode we should not touch the
         * bus as we may be talking too fast.
         */
        dev->pio_mode = XFER_PIO_0;

        /* If the controller has a pio mode setup function
         * then use it to set the chipset to rights. Don't
         * touch the DMA setup as that will be dealt with when
         * configuring devices.
         */
        if (ap->ops->set_piomode)
            ap->ops->set_piomode(ap, dev);
    }

    /* Determine which reset to use and record in ehc->i.action.
     * prereset() may examine and modify it.
     */
    if (softreset && (!hardreset || (!(lflags & ATA_LFLAG_NO_SRST) &&
                     !sata_set_spd_needed(link) &&
                     !(ehc->i.action & ATA_EH_HARDRESET))))
        tmp_action = ATA_EH_SOFTRESET;
    else
        tmp_action = ATA_EH_HARDRESET;

    ehc->i.action = (ehc->i.action & ~ATA_EH_RESET_MASK) | tmp_action;

    if (prereset) {
        rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT);
        if (rc) {
            if (rc == -ENOENT) {
                ata_link_printk(link, KERN_DEBUG,
                        "port disabled. ignoring.\n");
                ehc->i.action &= ~ATA_EH_RESET_MASK;

                ata_link_for_each_dev(dev, link)
                    classes[dev->devno] = ATA_DEV_NONE;

                rc = 0;
            } else
                ata_link_printk(link, KERN_ERR,
                    "prereset failed (errno=%d)\n", rc);
            goto out;
        }
    }

    /* prereset() might have modified ehc->i.action */
    if (ehc->i.action & ATA_EH_HARDRESET)
        reset = hardreset;
    else if (ehc->i.action & ATA_EH_SOFTRESET)
        reset = softreset;
    else {
        /* prereset told us not to reset, bang classes and return */
        ata_link_for_each_dev(dev, link)
            classes[dev->devno] = ATA_DEV_NONE;
        rc = 0;
        goto out;
    }

    /* did prereset() screw up?  if so, fix up to avoid oopsing */
    if (!reset) {
        if (softreset)
            reset = softreset;
        else
            reset = hardreset;
    }

 retry:
    deadline = jiffies + ata_eh_reset_timeouts[try++];

    /* shut up during boot probing */
    if (verbose)
        ata_link_printk(link, KERN_INFO, "%s resetting link\n",
                reset == softreset ? "soft" : "hard");

    /* mark that this EH session started with reset */
    if (reset == hardreset)
        ehc->i.flags |= ATA_EHI_DID_HARDRESET;
    else
        ehc->i.flags |= ATA_EHI_DID_SOFTRESET;

    rc = ata_do_reset(link, reset, classes, deadline);

    if (reset == hardreset &&
        ata_eh_followup_srst_needed(link, rc, classify, classes)) {
        /* okay, let's do follow-up softreset */
        reset = softreset;

        if (!reset) {
            ata_link_printk(link, KERN_ERR,
                    "follow-up softreset required "
                    "but no softreset avaliable\n");
            rc = -EINVAL;
            goto fail;
        }

        ata_eh_about_to_do(link, NULL, ATA_EH_RESET_MASK);
        rc = ata_do_reset(link, reset, classes, deadline);
    }


    /* -EAGAIN can happen if we skipped followup SRST */
    if (rc && rc != -EAGAIN)
        goto fail;

    /* was classification successful? */
    if (classify && classes[0] == ATA_DEV_UNKNOWN &&
        !(lflags & ATA_LFLAG_ASSUME_CLASS)) {
        if (try < max_tries) {
            ata_link_printk(link, KERN_WARNING,
                    "classification failed\n");
            rc = -EINVAL;
            goto fail;
        }

        ata_link_printk(link, KERN_WARNING,
                "classfication failed, assuming ATA\n");
        lflags |= ATA_LFLAG_ASSUME_ATA;
    }

    ata_link_for_each_dev(dev, link) {
        /* After the reset, the device state is PIO 0 and the
         * controller state is undefined.  Reset also wakes up
         * drives from sleeping mode.
         */
        dev->pio_mode = XFER_PIO_0;
        dev->flags &= ~ATA_DFLAG_SLEEPING;

        if (ata_link_offline(link))
            continue;

        /* apply class override */
        if (lflags & ATA_LFLAG_ASSUME_ATA)
            classes[dev->devno] = ATA_DEV_ATA;
        else if (lflags & ATA_LFLAG_ASSUME_SEMB)
            classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */
    }

    /* record current link speed */
    if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
        link->sata_spd = (sstatus >> 4) & 0xf;

    if (postreset)
        postreset(link, classes);

    /* reset successful, schedule revalidation */
    ata_eh_done(link, NULL, ehc->i.action & ATA_EH_RESET_MASK);
    ehc->i.action |= ATA_EH_REVALIDATE;

    rc = 0;
 out:
    /* clear hotplug flag */
    ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;

    spin_lock_irqsave(ap->lock, flags);
    ap->pflags &= ~ATA_PFLAG_RESETTING;
    spin_unlock_irqrestore(ap->lock, flags);

    return rc;

 fail:
    if (rc == -ERESTART || try >= max_tries)
        goto out;

    now = jiffies;
    if (time_before(now, deadline)) {
        unsigned long delta = deadline - now;

        ata_link_printk(link, KERN_WARNING, "reset failed "
                "(errno=%d), retrying in %u secs\n",
                rc, (jiffies_to_msecs(delta) + 999) / 1000);

        while (delta)
            delta = schedule_timeout_uninterruptible(delta);
    }

    if (rc == -EPIPE || try == max_tries - 1)
        sata_down_spd_limit(link);
    if (hardreset)
        reset = hardreset;
    goto retry;
}

这个函数比较复杂,读者可以自己分析下, 由于篇幅所限就不一一分析下去了。


因为本文讨论的大部分都是scsi模块的内容,所以将名字更改,但是这是之前两篇文章的继续, libata是scsi的子系统。

scsi部分主要是重要的结构的建立, 启动层上其下的作用。

linux/driver/scsi/sd.c
这个模块初始化早于sata驱动的初始化,我们看一下该模块的初始化函数。

/**
 *    init_sd - entry point for this driver (both when built in or when
 *    a module).
 *
 *    Note: this function registers this driver with the scsi mid-level.
 **/
static int __init init_sd(void)
{
    int majors = 0, i, err;

    SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));

    for (i = 0; i < SD_MAJORS; i++)
        if (register_blkdev(sd_major(i), "sd") == 0)
            majors++;

    if (!majors)
        return -ENODEV;

    err = class_register(&sd_disk_class);
    if (err)
        goto err_out;

    err = scsi_register_driver(&sd_template.gendrv);

    if (err)
        goto err_out_class;

    return 0;

err_out_class:
    class_unregister(&sd_disk_class);
err_out:
    for (i = 0; i < SD_MAJORS; i++)
        unregister_blkdev(sd_major(i), "sd");
    return err;
}

static struct scsi_driver sd_template = {
    .owner            = THIS_MODULE,
    .gendrv = {
        .name        = "sd",
        .probe        = sd_probe,
        .remove        = sd_remove,
        .suspend    = sd_suspend,
        .resume        = sd_resume,
        .shutdown    = sd_shutdown,
    },
    .rescan            = sd_rescan,
    .done            = sd_done,
};

一般我们认为系统在执行完scs_register_driver系统会根据总线上的设备情况,执行进一步的处理 , 如执行驱动提供的probe函数,但是因为这个模块早于sata驱动的初始化,系统没有其他的硬盘设备,导致这里并没有在总线上发现设备。知道sata初始化完并向scsi层注册设备后,才会手动执行这里probe函数,进而完成sd模块的初始化,如分配设备号等。

下面我们继续分析之前两篇文章剩下的一点部分,引入今天我们要分析的内容。

之前我们看到(第一篇文章)系统在执行到ata_host_register会等待一个内核线程的执行,并在第二篇文章看到了内核线程的执行流程以及该线程结束后会唤醒等待的主初始化线程,现在我们从等待后面继续往下看。

ata_host_register函数剩下的部分比较简单,只有几行代码,我们主要看他调用的一个函数,如下:

linux/driver/ata/libata_scsi.c

void ata_scsi_scan_host(struct ata_port *ap, int sync)
{
    int tries = 5;
    struct ata_device *last_failed_dev = NULL;
    struct ata_link *link;
    struct ata_device *dev;

    if (ap->flags & ATA_FLAG_DISABLED)
        return;

 repeat:
    ata_port_for_each_link(link, ap) {
        ata_link_for_each_dev(dev, link) {
            struct scsi_device *sdev;
            int channel = 0, id = 0;

            if (!ata_dev_enabled(dev) || dev->sdev)
                continue;

            if (ata_is_host_link(link))
                id = dev->devno;
            else
                channel = link->pmp;

            sdev = __scsi_add_device(ap->scsi_host, channel, id, 0,
                         NULL);

            if (!IS_ERR(sdev)) {
                dev->sdev = sdev;
                scsi_device_put(sdev);
            }
        }
    }

    /* If we scanned while EH was in progress or allocation
     * failure occurred, scan would have failed silently.  Check
     * whether all devices are attached.
     */
    ata_port_for_each_link(link, ap) {
        ata_link_for_each_dev(dev, link) {
            if (ata_dev_enabled(dev) && !dev->sdev)
                goto exit_loop;
        }
    }
 exit_loop:
    if (!link)
        return;

    /* we're missing some SCSI devices */
    if (sync) {
        /* If caller requested synchrnous scan && we've made
         * any progress, sleep briefly and repeat.
         */
        if (dev != last_failed_dev) {
            msleep(100);
            last_failed_dev = dev;
            goto repeat;
        }

        /* We might be failing to detect boot device, give it
         * a few more chances.
         */
        if (--tries) {
            msleep(100);
            goto repeat;
        }

        ata_port_printk(ap, KERN_ERR, "WARNING: synchronous SCSI scan "
                "failed without making any progress,\n"
                "                  switching to async\n");
    }

    queue_delayed_work(ata_aux_wq, &ap->hotplug_task,
               round_jiffies_relative(HZ));
}

linux/dirver/scsi/scsi_scan.c

struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
                      uint id, uint lun, void *hostdata)
{
    struct scsi_device *sdev = ERR_PTR(-ENODEV);
    struct device *parent = &shost->shost_gendev;
    struct scsi_target *starget;

    if (strncmp(scsi_scan_type, "none", 4) == 0)
        return ERR_PTR(-ENODEV);

    starget = scsi_alloc_target(parent, channel, id);
    if (!starget)
        return ERR_PTR(-ENOMEM);

    mutex_lock(&shost->scan_mutex);
    if (!shost->async_scan)
        scsi_complete_async_scans();

    if (scsi_host_scan_allowed(shost))
        scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);

    mutex_unlock(&shost->scan_mutex);
    scsi_target_reap(starget);
    put_device(&starget->dev);

    return sdev;
}

/**
 * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
 * @starget:    pointer to target device structure
 * @lun:    LUN of target device
 * @sdevscan:    probe the LUN corresponding to this scsi_device
 * @sdevnew:    store the value of any new scsi_device allocated
 * @bflagsp:    store bflags here if not NULL
 *
 * Description:
 *     Call scsi_probe_lun, if a LUN with an attached device is found,
 *     allocate and set it up by calling scsi_add_lun.
 *
 * Return:
 *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *     SCSI_SCAN_TARGET_PRESENT: target responded, but no device is
 *         attached at the LUN
 *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_probe_and_add_lun(struct scsi_target *starget,
                  uint lun, int *bflagsp,
                  struct scsi_device **sdevp, int rescan,
                  void *hostdata)
{
    struct scsi_device *sdev;
    unsigned char *result;
    int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

    /*
     * The rescan flag is used as an optimization, the first scan of a
     * host adapter calls into here with rescan == 0.
     */
    sdev = scsi_device_lookup_by_target(starget, lun);
    if (sdev) {
        if (rescan || sdev->sdev_state != SDEV_CREATED) {
            SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
                "scsi scan: device exists on %s\n",
                sdev->sdev_gendev.bus_id));
            if (sdevp)
                *sdevp = sdev;
            else
                scsi_device_put(sdev);

            if (bflagsp)
                *bflagsp = scsi_get_device_flags(sdev,
                                 sdev->vendor,
                                 sdev->model);
            return SCSI_SCAN_LUN_PRESENT;
        }
        scsi_device_put(sdev);
    } else
        sdev = scsi_alloc_sdev(starget, lun, hostdata);
    if (!sdev)
        goto out;

    result = kmalloc(result_len, GFP_ATOMIC |
            ((shost->unchecked_isa_dma) ? __GFP_DMA : 0));
    if (!result)
        goto out_free_sdev;

    if (scsi_probe_lun(sdev, result, result_len, &bflags))
        goto out_free_result;

    if (bflagsp)
        *bflagsp = bflags;
    /*
     * result contains valid SCSI INQUIRY data.
     */
    if (((result[0] >> 5) == 3) && !(bflags & BLIST_ATTACH_PQ3)) {
        /*
         * For a Peripheral qualifier 3 (011b), the SCSI
         * spec says: The device server is not capable of
         * supporting a physical device on this logical
         * unit.
         *
         * For disks, this implies that there is no
         * logical disk configured at sdev->lun, but there
         * is a target id responding.
         */
        SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:"
                   " peripheral qualifier of 3, device not"
                   " added\n"))
        if (lun == 0) {
            SCSI_LOG_SCAN_BUS(1, {
                unsigned char vend[9];
                unsigned char mod[17];

                sdev_printk(KERN_INFO, sdev,
                    "scsi scan: consider passing scsi_mod."
                    "dev_flags=%s:%s:0x240 or 0x1000240\n",
                    scsi_inq_str(vend, result, 8, 16),
                    scsi_inq_str(mod, result, 16, 32));
            });
        }
        
        res = SCSI_SCAN_TARGET_PRESENT;
        goto out_free_result;
    }

    /*
     * Some targets may set slight variations of PQ and PDT to signal
     * that no LUN is present, so don't add sdev in these cases.
     * Two specific examples are:
     * 1) NetApp targets: return PQ=1, PDT=0x1f
     * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
     *    in the UFI 1.0 spec (we cannot rely on reserved bits).
     *
     * References:
     * 1) SCSI SPC-3, pp. 145-146
     * PQ=1: "A peripheral device having the specified peripheral
     * device type is not connected to this logical unit. However, the
     * device server is capable of supporting the specified peripheral
     * device type on this logical unit."
     * PDT=0x1f: "Unknown or no device type"
     * 2) USB UFI 1.0, p. 20
     * PDT=00h Direct-access device (floppy)
     * PDT=1Fh none (no FDD connected to the requested logical unit)
     */
    if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
         (result[0] & 0x1f) == 0x1f) {
        SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
                    "scsi scan: peripheral device type"
                    " of 31, no device added\n"));
        res = SCSI_SCAN_TARGET_PRESENT;
        goto out_free_result;
    }

    res = scsi_add_lun(sdev, result, &bflags, shost->async_scan);
    if (res == SCSI_SCAN_LUN_PRESENT) {
        if (bflags & BLIST_KEY) {
            sdev->lockable = 0;
            scsi_unlock_floptical(sdev, result);
        }
    }


 out_free_result:
    kfree(result);
 out_free_sdev:
    if (res == SCSI_SCAN_LUN_PRESENT) {
        if (sdevp) {
            if (scsi_device_get(sdev) == 0) {
                *sdevp = sdev;
            } else {
                __scsi_remove_device(sdev);
                res = SCSI_SCAN_NO_RESPONSE;
            }
        }
    } else
        scsi_destroy_sdev(sdev);
 out:
    return res;
}

/**
 * scsi_add_lun - allocate and fully initialze a scsi_device
 * @sdev:    holds information to be stored in the new scsi_device
 * @inq_result:    holds the result of a previous INQUIRY to the LUN
 * @bflags:    black/white list flag
 * @async:    1 if this device is being scanned asynchronously
 *
 * Description:
 *     Initialize the scsi_device @sdev.  Optionally set fields based
 *     on values in *@bflags.
 *
 * Return:
 *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
        int *bflags, int async)
{
    /*
     * XXX do not save the inquiry, since it can change underneath us,
     * save just vendor/model/rev.
     *
     * Rather than save it and have an ioctl that retrieves the saved
     * value, have an ioctl that executes the same INQUIRY code used
     * in scsi_probe_lun, let user level programs doing INQUIRY
     * scanning run at their own risk, or supply a user level program
     * that can correctly scan.
     */

    /*
     * Copy at least 36 bytes of INQUIRY data, so that we don't
     * dereference unallocated memory when accessing the Vendor,
     * Product, and Revision strings.  Badly behaved devices may set
     * the INQUIRY Additional Length byte to a small value, indicating
     * these strings are invalid, but often they contain plausible data
     * nonetheless.  It doesn't matter if the device sent < 36 bytes
     * total, since scsi_probe_lun() initializes inq_result with 0s.
     */
    sdev->inquiry = kmemdup(inq_result,
                max_t(size_t, sdev->inquiry_len, 36),
                GFP_ATOMIC);
    if (sdev->inquiry == NULL)
        return SCSI_SCAN_NO_RESPONSE;

    sdev->vendor = (char *) (sdev->inquiry + 8);
    sdev->model = (char *) (sdev->inquiry + 16);
    sdev->rev = (char *) (sdev->inquiry + 32);

    if (*bflags & BLIST_ISROM) {
        sdev->type = TYPE_ROM;
        sdev->removable = 1;
    } else {
        sdev->type = (inq_result[0] & 0x1f);
        sdev->removable = (inq_result[1] & 0x80) >> 7;
    }

    switch (sdev->type) {
    case TYPE_RBC:
    case TYPE_TAPE:
    case TYPE_DISK:
    case TYPE_PRINTER:
    case TYPE_MOD:
    case TYPE_PROCESSOR:
    case TYPE_SCANNER:
    case TYPE_MEDIUM_CHANGER:
    case TYPE_ENCLOSURE:
    case TYPE_COMM:
    case TYPE_RAID:
        sdev->writeable = 1;
        break;
    case TYPE_ROM:
    case TYPE_WORM:
        sdev->writeable = 0;
        break;
    default:
        printk(KERN_INFO "scsi: unknown device type %d\n", sdev->type);
    }

    if (sdev->type == TYPE_RBC || sdev->type == TYPE_ROM) {
        /* RBC and MMC devices can return SCSI-3 compliance and yet
         * still not support REPORT LUNS, so make them act as
         * BLIST_NOREPORTLUN unless BLIST_REPORTLUN2 is
         * specifically set */
        if ((*bflags & BLIST_REPORTLUN2) == 0)
            *bflags |= BLIST_NOREPORTLUN;
    }

    /*
     * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI
     * spec says: The device server is capable of supporting the
     * specified peripheral device type on this logical unit. However,
     * the physical device is not currently connected to this logical
     * unit.
     *
     * The above is vague, as it implies that we could treat 001 and
     * 011 the same. Stay compatible with previous code, and create a
     * scsi_device for a PQ of 1
     *
     * Don't set the device offline here; rather let the upper
     * level drivers eval the PQ to decide whether they should
     * attach. So remove ((inq_result[0] >> 5) & 7) == 1 check.
     */ 

    sdev->inq_periph_qual = (inq_result[0] >> 5) & 7;
    sdev->lockable = sdev->removable;
    sdev->soft_reset = (inq_result[7] & 1) && ((inq_result[3] & 7) == 2);

    if (sdev->scsi_level >= SCSI_3 ||
            (sdev->inquiry_len > 56 && inq_result[56] & 0x04))
        sdev->ppr = 1;
    if (inq_result[7] & 0x60)
        sdev->wdtr = 1;
    if (inq_result[7] & 0x10)
        sdev->sdtr = 1;

    sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
            "ANSI: %d%s\n", scsi_device_type(sdev->type),
            sdev->vendor, sdev->model, sdev->rev,
            sdev->inq_periph_qual, inq_result[2] & 0x07,
            (inq_result[3] & 0x0f) == 1 ? " CCS" : "");

    if ((sdev->scsi_level >= SCSI_2) && (inq_result[7] & 2) &&
        !(*bflags & BLIST_NOTQ))
        sdev->tagged_supported = 1;

    /*
     * Some devices (Texel CD ROM drives) have handshaking problems
     * when used with the Seagate controllers. borken is initialized
     * to 1, and then set it to 0 here.
     */
    if ((*bflags & BLIST_BORKEN) == 0)
        sdev->borken = 0;

    if (*bflags & BLIST_NO_ULD_ATTACH)
        sdev->no_uld_attach = 1;

    /*
     * Apparently some really broken devices (contrary to the SCSI
     * standards) need to be selected without asserting ATN
     */
    if (*bflags & BLIST_SELECT_NO_ATN)
        sdev->select_no_atn = 1;

    /*
     * Maximum 512 sector transfer length
     * broken RA4x00 Compaq Disk Array
     */
    if (*bflags & BLIST_MAX_512)
        blk_queue_max_sectors(sdev->request_queue, 512);

    /*
     * Some devices may not want to have a start command automatically
     * issued when a device is added.
     */
    if (*bflags & BLIST_NOSTARTONADD)
        sdev->no_start_on_add = 1;

    if (*bflags & BLIST_SINGLELUN)
        sdev->single_lun = 1;

    sdev->use_10_for_rw = 1;

    if (*bflags & BLIST_MS_SKIP_PAGE_08)
        sdev->skip_ms_page_8 = 1;

    if (*bflags & BLIST_MS_SKIP_PAGE_3F)
        sdev->skip_ms_page_3f = 1;

    if (*bflags & BLIST_USE_10_BYTE_MS)
        sdev->use_10_for_ms = 1;

    /* set the device running here so that slave configure
     * may do I/O */
    scsi_device_set_state(sdev, SDEV_RUNNING);

    if (*bflags & BLIST_MS_192_BYTES_FOR_3F)
        sdev->use_192_bytes_for_3f = 1;

    if (*bflags & BLIST_NOT_LOCKABLE)
        sdev->lockable = 0;

    if (*bflags & BLIST_RETRY_HWERROR)
        sdev->retry_hwerror = 1;

    transport_configure_device(&sdev->sdev_gendev);

    if (sdev->host->hostt->slave_configure) {
        int ret = sdev->host->hostt->slave_configure(sdev);
        if (ret) {
            /*
             * if LLDD reports slave not present, don't clutter
             * console with alloc failure messages
             */
            if (ret != -ENXIO) {
                sdev_printk(KERN_ERR, sdev,
                    "failed to configure device\n");
            }
            return SCSI_SCAN_NO_RESPONSE;
        }
    }

    /*
     * Ok, the device is now all set up, we can
     * register it and tell the rest of the kernel
     * about it.
     */
    if (!async && scsi_sysfs_add_sdev(sdev) != 0)
        return SCSI_SCAN_NO_RESPONSE;

    return SCSI_SCAN_LUN_PRESENT;
}

linux/driver/scsi/scsi_sysfs.c

/**
 * scsi_sysfs_add_sdev - add scsi device to sysfs
 * @sdev:    scsi_device to add
 *
 * Return value:
 *     0 on Success / non-zero on Failure
 **/
int scsi_sysfs_add_sdev(struct scsi_device *sdev)
{
    int error, i;
    struct request_queue *rq = sdev->request_queue;

    if ((error = scsi_device_set_state(sdev, SDEV_RUNNING)) != 0)
        return error;

    error = device_add(&sdev->sdev_gendev);
    if (error) {
        put_device(sdev->sdev_gendev.parent);
        printk(KERN_INFO "error 1\n");
        return error;
    }

    error = class_device_add(&sdev->sdev_classdev);
    if (error) {
        printk(KERN_INFO "error 2\n");
        goto clean_device;
    }

    /* take a reference for the sdev_classdev; this is
     * released by the sdev_class .release */
    get_device(&sdev->sdev_gendev);

    /* create queue files, which may be writable, depending on the host */
    if (sdev->host->hostt->change_queue_depth)
        error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_depth_rw);
    else
        error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_depth);
    if (error) {
        __scsi_remove_device(sdev);
        goto out;
    }
    if (sdev->host->hostt->change_queue_type)
        error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_type_rw);
    else
        error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_type);
    if (error) {
        __scsi_remove_device(sdev);
        goto out;
    }

    error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL);

    if (error)
        sdev_printk(KERN_INFO, sdev,
                "Failed to register bsg queue, errno=%d\n", error);

    /* we're treating error on bsg register as non-fatal, so pretend
     * nothing went wrong */
    error = 0;

    /* add additional host specific attributes */
    if (sdev->host->hostt->sdev_attrs) {
        for (i = 0; sdev->host->hostt->sdev_attrs[i]; i++) {
            error = device_create_file(&sdev->sdev_gendev,
                    sdev->host->hostt->sdev_attrs[i]);
            if (error) {
                __scsi_remove_device(sdev);
                goto out;
            }
        }
    }

    transport_add_device(&sdev->sdev_gendev);
 out:

    return error;

 clean_device:
    scsi_device_set_state(sdev, SDEV_CANCEL);

    device_del(&sdev->sdev_gendev);
    transport_destroy_device(&sdev->sdev_gendev);
    put_device(&sdev->sdev_gendev);

    return error;
}

上面的add_device可以执行本文章开头的probe函数,因为篇幅原因,就不继续深入分析,我们直接分析sd模块的probe函数:


/**
 *    sd_probe - called during driver initialization and whenever a
 *    new scsi device is attached to the system. It is called once
 *    for each scsi device (not just disks) present.
 *    @dev: pointer to device object
 *
 *    Returns 0 if successful (or not interested in this scsi device 
 *    (e.g. scanner)); 1 when there is an error.
 *
 *    Note: this function is invoked from the scsi mid-level.
 *    This function sets up the mapping between a given 
 *    <host,channel,id,lun> (found in sdp) and new device name 
 *    (e.g. /dev/sda). More precisely it is the block device major 
 *    and minor number that is chosen here.
 *
 *    Assume sd_attach is not re-entrant (for time being)
 *    Also think about sd_attach() and sd_remove() running coincidentally.
 **/
static int sd_probe(struct device *dev)
{
    struct scsi_device *sdp = to_scsi_device(dev);
    struct scsi_disk *sdkp;
    struct gendisk *gd;
    u32 index;
    int error;

    error = -ENODEV;
    if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
        goto out;

    SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
                    "sd_attach\n"));

    error = -ENOMEM;
    sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
    if (!sdkp)
        goto out;

    gd = alloc_disk(16);
    if (!gd)
        goto out_free;

    if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
        goto out_put;

    spin_lock(&sd_index_lock);
    error = idr_get_new(&sd_index_idr, NULL, &index);
    spin_unlock(&sd_index_lock);

    if (index >= SD_MAX_DISKS)
        error = -EBUSY;
    if (error)
        goto out_put;

    sdkp->device = sdp;
    sdkp->driver = &sd_template;
    sdkp->disk = gd;
    sdkp->index = index;
    sdkp->openers = 0;

    if (!sdp->timeout) {
        if (sdp->type != TYPE_MOD)
            sdp->timeout = SD_TIMEOUT;
        else
            sdp->timeout = SD_MOD_TIMEOUT;
    }

    class_device_initialize(&sdkp->cdev);
    sdkp->cdev.dev = &sdp->sdev_gendev;
    sdkp->cdev.class = &sd_disk_class;
    strncpy(sdkp->cdev.class_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);

    if (class_device_add(&sdkp->cdev))
        goto out_put;

    get_device(&sdp->sdev_gendev);

    gd->major = sd_major((index & 0xf0) >> 4);
    gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
    gd->minors = 16;
    gd->fops = &sd_fops;

    if (index < 26) {
        sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
    } else if (index < (26 + 1) * 26) {
        sprintf(gd->disk_name, "sd%c%c",
            'a' + index / 26 - 1,'a' + index % 26);
    } else {
        const unsigned int m1 = (index / 26 - 1) / 26 - 1;
        const unsigned int m2 = (index / 26 - 1) % 26;
        const unsigned int m3 =  index % 26;
        sprintf(gd->disk_name, "sd%c%c%c",
            'a' + m1, 'a' + m2, 'a' + m3);
    }

    gd->private_data = &sdkp->driver;
    gd->queue = sdkp->device->request_queue;

    sd_revalidate_disk(gd);

    blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);

    gd->driverfs_dev = &sdp->sdev_gendev;
    gd->flags = GENHD_FL_DRIVERFS;
    if (sdp->removable)
        gd->flags |= GENHD_FL_REMOVABLE;

    dev_set_drvdata(dev, sdkp);
    add_disk(gd);

    sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
          sdp->removable ? "removable " : "");

    return 0;

 out_put:
    put_disk(gd);
 out_free:
    kfree(sdkp);
 out:
    return error;
}
这个函数分配了盘符等必须的信息,但是还是没有完成与用户空间的接口设置,在函数执行快结束的地方我们能看见函数add_disk, 这个函数并不是属于scsi层的函数,而是linux/block的函数,我们知道硬盘等都是块设备,而我们从最底层一直到现在还没有接触任何与此相关的内容,linux/driver/block 和 linux/block即是与块设备的操作逻辑,而scsi层与libata只是为block层提供具体操作的接口。后续文章我们将继续深入分析。 


块设备初始化分析(libata初始化分析 4)

我们先将前面的内容简单的回顾下, 整个初始化由sata驱动模块开始,通过对sata设备的注册,初始化libata层的结构,在初始化的过程中,初始化了scsi层需要的结构,并开启一个错误处理线程,该线程负责处理在操作中出现异常/错误的处理,并负责确定是否对设备是否重新连接。因为现在整个系统刚刚初始化,还没有连接设备,所以通过该线程进行对设备的reset 以及连接等。
在此过程中,初始化线程处于等待状态,在错误处理线程执行一个周期后,初始化线程继续执行,并由此开始初始化总线上的设备,手动触发sd.c模块进行probe处理,这里的probe我们有必要详细分析下,因为这里有一些重要的数据结构,这些结构对于我们后续的理解有重大的帮助。

linux/driver/scsi/sd.c
/**
 *    sd_probe - called during driver initialization and whenever a
 *    new scsi device is attached to the system. It is called once
 *    for each scsi device (not just disks) present.
 *    @dev: pointer to device object
 *
 *    Returns 0 if successful (or not interested in this scsi device 
 *    (e.g. scanner)); 1 when there is an error.
 *
 *    Note: this function is invoked from the scsi mid-level.
 *    This function sets up the mapping between a given 
 *    <host,channel,id,lun> (found in sdp) and new device name 
 *    (e.g. /dev/sda). More precisely it is the block device major 
 *    and minor number that is chosen here.
 *
 *    Assume sd_attach is not re-entrant (for time being)
 *    Also think about sd_attach() and sd_remove() running coincidentally.
 **/
static int sd_probe(struct device *dev)
{
    struct scsi_device *sdp = to_scsi_device(dev);    //(1)
    struct scsi_disk *sdkp;
    struct gendisk *gd;
    u32 index;
    int error;

    error = -ENODEV;
    if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
        goto out;

    SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
                    "sd_attach\n"));

    error = -ENOMEM;
    sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
    if (!sdkp)
        goto out;

    gd = alloc_disk(16);
    if (!gd)
        goto out_free;

    if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
        goto out_put;

    spin_lock(&sd_index_lock);
    error = idr_get_new(&sd_index_idr, NULL, &index);
    spin_unlock(&sd_index_lock);

    if (index >= SD_MAX_DISKS)
        error = -EBUSY;
    if (error)
        goto out_put;

    sdkp->device = sdp;
    sdkp->driver = &sd_template;
    sdkp->disk = gd;
    sdkp->index = index;
    sdkp->openers = 0;  // (2)

    if (!sdp->timeout) {
        if (sdp->type != TYPE_MOD)
            sdp->timeout = SD_TIMEOUT;
        else
            sdp->timeout = SD_MOD_TIMEOUT;
    }

    class_device_initialize(&sdkp->cdev);
    sdkp->cdev.dev = &sdp->sdev_gendev;
    sdkp->cdev.class = &sd_disk_class;
    strncpy(sdkp->cdev.class_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);

    if (class_device_add(&sdkp->cdev))
        goto out_put;

    get_device(&sdp->sdev_gendev);

    gd->major = sd_major((index & 0xf0) >> 4);
    gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
    gd->minors = 16;
    gd->fops = &sd_fops;
   //(3)

    if (index < 26) {
        sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
    } else if (index < (26 + 1) * 26) {
        sprintf(gd->disk_name, "sd%c%c",
            'a' + index / 26 - 1,'a' + index % 26);
    } else {
        const unsigned int m1 = (index / 26 - 1) / 26 - 1;
        const unsigned int m2 = (index / 26 - 1) % 26;
        const unsigned int m3 =  index % 26;
        sprintf(gd->disk_name, "sd%c%c%c",
            'a' + m1, 'a' + m2, 'a' + m3);
    }

    gd->private_data = &sdkp->driver;
    gd->queue = sdkp->device->request_queue;
  //(4)

    sd_revalidate_disk(gd);

    blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);

    gd->driverfs_dev = &sdp->sdev_gendev;
    gd->flags = GENHD_FL_DRIVERFS;
    if (sdp->removable)
        gd->flags |= GENHD_FL_REMOVABLE;

    dev_set_drvdata(dev, sdkp);
    add_disk(gd);

    sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
          sdp->removable ? "removable " : "");

    return 0;

 out_put:
    put_disk(gd);
 out_free:
    kfree(sdkp);
 out:
    return error;
}

我们先说明下第三部分,第三部分初始化了struct gendisk的设备号和一个重要的指针( gd->fops = &sd_fops;),我们先来看下这个结构体的初始化:

static struct block_device_operations sd_fops = {
    .owner            = THIS_MODULE,
    .open            = sd_open,
    .release        = sd_release,
    .ioctl            = sd_ioctl,
    .getgeo            = sd_getgeo,
#ifdef CONFIG_COMPAT
    .compat_ioctl        = sd_compat_ioctl,
#endif
    .media_changed        = sd_media_changed,
    .revalidate_disk    = sd_revalidate_disk,
};

这个结构提很像字符设备中的file_operation,同样的有open, release函数,其实这个就是块设备的接口函数,我们打开一个块设备,系统最终会运行这里的open函数,至于这个函数的生效过程我们一会再分析。

第1 2 4部分要一起看,其实这里有一个块设备重要的操作函数,我们在上面的块设备操作函数没有看到read, write函数,因为块的读写是通过一个特殊的函数request函数实现的, 系统在文件系统层将需要的操作写入到相应设备的queue中,而这个queue中就包含这个request函数的指针,方便系统的最后调用,我们可以看到这里的gd->queue来自于probe函数的传入参数,我们可以看下之前的分析过程中的一个函数:

/linux/driver/scsi/scsi_scan.c

/**
 * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
 * @starget:    pointer to target device structure
 * @lun:    LUN of target device
 * @sdevscan:    probe the LUN corresponding to this scsi_device
 * @sdevnew:    store the value of any new scsi_device allocated
 * @bflagsp:    store bflags here if not NULL
 *
 * Description:
 *     Call scsi_probe_lun, if a LUN with an attached device is found,
 *     allocate and set it up by calling scsi_add_lun.
 *
 * Return:
 *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *     SCSI_SCAN_TARGET_PRESENT: target responded, but no device is
 *         attached at the LUN
 *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_probe_and_add_lun(struct scsi_target *starget,
                  uint lun, int *bflagsp,
                  struct scsi_device **sdevp, int rescan,
                  void *hostdata)
{
    struct scsi_device *sdev;
    unsigned char *result;
    int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

    /*
     * The rescan flag is used as an optimization, the first scan of a
     * host adapter calls into here with rescan == 0.
     */
    sdev = scsi_device_lookup_by_target(starget, lun);
    if (sdev) {
        if (rescan || sdev->sdev_state != SDEV_CREATED) {
            SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
                "scsi scan: device exists on %s\n",
                sdev->sdev_gendev.bus_id));
            if (sdevp)
                *sdevp = sdev;
            else
                scsi_device_put(sdev);

            if (bflagsp)
                *bflagsp = scsi_get_device_flags(sdev,
                                 sdev->vendor,
                                 sdev->model);
            return SCSI_SCAN_LUN_PRESENT;
        }
        scsi_device_put(sdev);
    } else
        sdev = scsi_alloc_sdev(starget, lun, hostdata);

    if (!sdev)
        goto out;

    result = kmalloc(result_len, GFP_ATOMIC |
            ((shost->unchecked_isa_dma) ? __GFP_DMA : 0));
    if (!result)
        goto out_free_sdev;

    if (scsi_probe_lun(sdev, result, result_len, &bflags))
        goto out_free_result;

    if (bflagsp)
        *bflagsp = bflags;
    /*
     * result contains valid SCSI INQUIRY data.
     */
    if (((result[0] >> 5) == 3) && !(bflags & BLIST_ATTACH_PQ3)) {
        /*
         * For a Peripheral qualifier 3 (011b), the SCSI
         * spec says: The device server is not capable of
         * supporting a physical device on this logical
         * unit.
         *
         * For disks, this implies that there is no
         * logical disk configured at sdev->lun, but there
         * is a target id responding.
         */
        SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:"
                   " peripheral qualifier of 3, device not"
                   " added\n"))
        if (lun == 0) {
            SCSI_LOG_SCAN_BUS(1, {
                unsigned char vend[9];
                unsigned char mod[17];

                sdev_printk(KERN_INFO, sdev,
                    "scsi scan: consider passing scsi_mod."
                    "dev_flags=%s:%s:0x240 or 0x1000240\n",
                    scsi_inq_str(vend, result, 8, 16),
                    scsi_inq_str(mod, result, 16, 32));
            });
        }
        
        res = SCSI_SCAN_TARGET_PRESENT;
        goto out_free_result;
    }
这个函数在前面的文章曾经运行过一次,其中这里有相关queue的操作。


/**
 * scsi_alloc_sdev - allocate and setup a scsi_Device
 *
 * Description:
 *     Allocate, initialize for io, and return a pointer to a scsi_Device.
 *     Stores the @shost, @channel, @id, and @lun in the scsi_Device, and
 *     adds scsi_Device to the appropriate list.
 *
 * Return value:
 *     scsi_Device pointer, or NULL on failure.
 **/
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
                       unsigned int lun, void *hostdata)
{
    struct scsi_device *sdev;
    int display_failure_msg = 1, ret;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
    extern void scsi_evt_thread(struct work_struct *work);

    sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
               GFP_ATOMIC);
    if (!sdev)
        goto out;

    sdev->vendor = scsi_null_device_strs;
    sdev->model = scsi_null_device_strs;
    sdev->rev = scsi_null_device_strs;
    sdev->host = shost;
    sdev->id = starget->id;
    sdev->lun = lun;
    sdev->channel = starget->channel;
    sdev->sdev_state = SDEV_CREATED;
    INIT_LIST_HEAD(&sdev->siblings);
    INIT_LIST_HEAD(&sdev->same_target_siblings);
    INIT_LIST_HEAD(&sdev->cmd_list);
    INIT_LIST_HEAD(&sdev->starved_entry);
    INIT_LIST_HEAD(&sdev->event_list);
    spin_lock_init(&sdev->list_lock);
    INIT_WORK(&sdev->event_work, scsi_evt_thread);

    sdev->sdev_gendev.parent = get_device(&starget->dev);
    sdev->sdev_target = starget;

    /* usually NULL and set by ->slave_alloc instead */
    sdev->hostdata = hostdata;

    /* if the device needs this changing, it may do so in the
     * slave_configure function */
    sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;

    /*
     * Some low level driver could use device->type
     */
    sdev->type = -1;

    /*
     * Assume that the device will have handshaking problems,
     * and then fix this field later if it turns out it
     * doesn't
     */
    sdev->borken = 1;

    sdev->request_queue = scsi_alloc_queue(sdev);
    if (!sdev->request_queue) {
        /* release fn is set up in scsi_sysfs_device_initialise, so
         * have to free and put manually here */
        put_device(&starget->dev);
        kfree(sdev);
        goto out;
    }

    sdev->request_queue->queuedata = sdev;
    scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);

    scsi_sysfs_device_initialize(sdev);

    if (shost->hostt->slave_alloc) {
        ret = shost->hostt->slave_alloc(sdev);
        if (ret) {
            /*
             * if LLDD reports slave not present, don't clutter
             * console with alloc failure messages
             */
            if (ret == -ENXIO)
                display_failure_msg = 0;
            goto out_device_destroy;
        }
    }

    return sdev;

out_device_destroy:
    transport_destroy_device(&sdev->sdev_gendev);
    put_device(&sdev->sdev_gendev);
out:
    if (display_failure_msg)
        printk(ALLOC_FAILURE_MSG, __FUNCTION__);
    return NULL;
}
这里初始化的sdev就是传入文章开始probe函数的参数,我们继续看queue的初始化。

struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
{
    struct request_queue *q;

    q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
    if (!q)
        return NULL;

    blk_queue_prep_rq(q, scsi_prep_fn);
    blk_queue_softirq_done(q, scsi_softirq_done);
    return q;
}

这里的传入__scsi_alloc_queue函数的第二个参数就是我们要找的request函数,系统在读写设备会最终运行这个函数。


struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
                     request_fn_proc *request_fn)
{
    struct request_queue *q;

    q = blk_init_queue(request_fn, NULL);
    if (!q)
        return NULL;

    /*
     * this limit is imposed by hardware restrictions
     */
    blk_queue_max_hw_segments(q, shost->sg_tablesize);

    /*
     * In the future, sg chaining support will be mandatory and this
     * ifdef can then go away. Right now we don't have all archs
     * converted, so better keep it safe.
     */
#ifdef ARCH_HAS_SG_CHAIN
    if (shost->use_sg_chaining)
        blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
    else
        blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#else
    blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#endif

    blk_queue_max_sectors(q, shost->max_sectors);
    blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
    blk_queue_segment_boundary(q, shost->dma_boundary);

    if (!shost->use_clustering)
        clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
    return q;
}
EXPORT_SYMBOL(__scsi_alloc_queue);
都这里关于queue与request的由来,以及快设备的操作结构体我们就分析到这里,读者可以继续看看下面的具体实现。

下面我们要进入probe为之准备万分的一个重要函数: add_disk

/**
 * add_disk - add partitioning information to kernel list
 * @disk: per-device partitioning information
 *
 * This function registers the partitioning information in @disk
 * with the kernel.
 */
void add_disk(struct gendisk *disk)
{
    disk->flags |= GENHD_FL_UP;
    blk_register_region(MKDEV(disk->major, disk->first_minor),
                disk->minors, NULL, exact_match, exact_lock, disk);
    register_disk(disk);
    blk_register_queue(disk);
}

EXPORT_SYMBOL(add_disk);
关于这个函数我在LDD3上看到这样的描述:

“一旦调用了add_disk,磁盘设备将被激活(可以理解为已经初始化完毕),并随时会调用它提供的方法。“从这里可以看出来add_disk是块设备生效的关键的最后一步。

linux/fs/partitions/check.c  可以看出这个函数已经是文件系统层的。


/* Not exported, helper to add_disk(). */
void register_disk(struct gendisk *disk)
{
    struct block_device *bdev;
    char *s;
    int i;
    struct hd_struct *p;
    int err;

    kobject_set_name(&disk->kobj, "%s", disk->disk_name);
    /* ewww... some of these buggers have / in name... */
    s = strchr(disk->kobj.k_name, '/');
    if (s)
        *s = '!';
    if ((err = kobject_add(&disk->kobj)))
        return;
    err = disk_sysfs_symlinks(disk);
    if (err) {
        kobject_del(&disk->kobj);
        return;
    }
     disk_sysfs_add_subdirs(disk);

    /* No minors to use for partitions */
    if (disk->minors == 1)
        goto exit;

    /* No such device (e.g., media were just removed) */
    if (!get_capacity(disk))
        goto exit;

    bdev = bdget_disk(disk, 0);
    if (!bdev)
        goto exit;

    /* scan partition table, but suppress uevents */
    bdev->bd_invalidated = 1;
    disk->part_uevent_suppress = 1;
    err = blkdev_get(bdev, FMODE_READ, 0);
    disk->part_uevent_suppress = 0;
    if (err < 0)
        goto exit;
    blkdev_put(bdev);

exit:
    /* announce disk after possible partitions are already created */
    kobject_uevent(&disk->kobj, KOBJ_ADD);

    /* announce possible partitions */
    for (i = 1; i < disk->minors; i++) {
        p = disk->part[i-1];
        if (!p || !p->nr_sects)
            continue;
        kobject_uevent(&p->kobj, KOBJ_ADD);
    }
}
由于篇幅和时间问题,本系列文章就分析这么多,这里我只是分析了初始化的大体流程,读者可以选择性的阅读相关代码了解详细的流程, 至于关于块设备的读写流程,我在晚上发现了一篇很不错的文章,读者可以参考下,这里就不使用源码分析了。

谢谢大家的光临!!

原创粉丝点击