.NET Main函数参数解释过程和特殊规则

来源:互联网 发布:单片机与接口技术答案 编辑:程序博客网 时间:2024/04/27 17:53
最近正在准备一个小型开发工具的发布工作(具体的内容请容我先卖个关子,等发布之后我会详细给大家介绍的)。在使用时不经意中发现,使用.NET开发的命令行工具的对引号和反斜杠 /和一般命令行程序有着不同的解释规则。举例来说,如果你在命令行下输入:
 
C:/> tool “C:/Program Files/”
 

实际上传入的参数是C:/Program Files” 。其实这里不仔细看可能发现不了问题。在原来的命令行中,第一个双引号代表一个参数的开始/结束,后面/”因为.NET的解释规则不同,代表实际的双引号,而非参数的开始/结束,因此最后的结果会多出一个双引号,并且缺少一个反斜杠。

内部,CLR使用CommandLineToArgvW来分析程序的命令行分离出各个参数,这个函数有着特殊的解释规则:
1.     2n个反斜杠后面跟一个双引号代表n个反斜杠
2.     2n+1个反斜杠后面跟一个双引号代表n个反斜杠加一个
3.     N个反斜杠后面不跟双引号直接代表n个反斜杠
这个规则比较绕,用例子的方式来解释就是:
命令行参数
实际参数
“C:/Program Files/”
C:/Program Files”
“C:/Program Files//”
C:/Program Files/
“C:/Program Files///”
C:/Program Files/”

因此,正确的方式是第二个,也就是”C:/Program Files//”

 
事实上,CLR内部并没有直接调用CommandLineToArgvw,而是直接实现了一个有着同等功能的函数SegmentCommandLine。在Rotor的源代码中可以找到它的实现,位于sscli20/clr/src/utilcode/util.cppCLR的主函数_CorExeMain在执行主函数之前会调用CorCommandLine::SetArgvW,这个函数会调用SegmentCommandLine来分析命令行(经过简化):
 
// Set argvw from command line
/* static */

HRESULT CorCommandLine::SetArgvW(LPCWSTR lpCommandLine)

{
 

    HRESULT hr = S_OK;

if(!m_ArgvW) {

    // 分析命令行

        m_ArgvW = SegmentCommandLine(lpCommandLine, &m_NumArgs);

 

        // CLR特有的命令行处理,主要是和ClickOnce有关的

        if (m_ArgvW)

            hr = ParseCor();

        else

            hr = E_OUTOFMEMORY;

    }

 

    return hr;

}
 
 
真正在执行Main主函数的时候,ClassLoader::RunMain函数则会调用CorCommandLine::GetArgvW获得之前分析得到的参数列表,并创建对应的托管String数组并传递给Main(经过简化):
/* static */

HRESULT ClassLoader::RunMain(MethodDesc *pFD ,

                             short numSkipArgs,
                             INT32 *piRetVal,

                             PTRARRAYREF *stringArgs /*=NULL*/)

{
 

    wzArgs = CorCommandLine::GetArgvW(&cCommandArgs);

 

    // 创建一个托管数组

    StrArgArray = (PTRARRAYREF) AllocateObjectArray((cCommandArgs - numSkipArgs), g_pStringClass);

 

    // 创建对应的托管字符串并赋给托管数组的每个元素

    for( arg = numSkipArgs; arg < cCommandArgs; arg++) {

        STRINGREF sref = COMString::NewString(wzArgs[arg]);

        StrArgArray->SetAt(arg-numSkipArgs, (OBJECTREF) sref);

    }

 
MethodDescCallSite threadStart(pFD); // 准备调用MethodDesc指向的主函数(EntryPoint)
 

    ARG_SLOT stackVar = ObjToArgSlot(StrArgArray); // 将数组元素转为函数参数

 

*piRetVal = (INT32)threadStart.Call_RetArgSlot(&stackVar); // 调用主函数(EntryPoint

 

    return hr;

}
 
而最关键的SegmentCommandLine函数代码则如下:
 
//---------------------------------------------------------------------

// Splits a command line into argc/argv lists, using the VC7 parsing rules.

//

// This functions interface mimics the CommandLineToArgvW api.

//
// If function fails, returns NULL.
//

// If function suceeds, call delete [] on return pointer when done.

//
//---------------------------------------------------------------------

LPWSTR *SegmentCommandLine(LPCWSTR lpCmdLine, DWORD *pNumArgs)

{

    STATIC_CONTRACT_NOTHROW;

    STATIC_CONTRACT_GC_NOTRIGGER;

    STATIC_CONTRACT_FAULT;

 
 

    *pNumArgs = 0;

 

    int nch = (int)wcslen(lpCmdLine);

 

    // Calculate the worstcase storage requirement. (One pointer for

    // each argument, plus storage for the arguments themselves.)

    int cbAlloc = (nch+1)*sizeof(LPWSTR) + sizeof(WCHAR)*(nch + 1);

    LPWSTR pAlloc = new (nothrow) WCHAR[cbAlloc / sizeof(WCHAR)];

    if (!pAlloc)

        return NULL;

 

    LPWSTR *argv = (LPWSTR*) pAlloc; // We store the argv pointers in the first halt

    LPWSTR pdst = (LPWSTR)( ((BYTE*)pAlloc) + sizeof(LPWSTR)*(nch+1) ); // A running pointer to second half to store arguments

    LPCWSTR psrc = lpCmdLine;

    WCHAR   c;

    BOOL    inquote;

    BOOL    copychar;

    int     numslash;

 

    // First, parse the program name (argv[0]). Argv[0] is parsed under

    // special rules. Anything up to the first whitespace outside a quoted

    // subtring is accepted. Backslashes are treated as normal characters.

    argv[ (*pNumArgs)++ ] = pdst;

    inquote = FALSE;

    do {

        if (*psrc == L'"' )

        {

            inquote = !inquote;
            c = *psrc++;
            continue;

        }

        *pdst++ = *psrc;

 

        c = *psrc++;

 

    } while ( (c != L'/0' && (inquote || (c != L' ' && c != L'/t'))) );

 

    if ( c == L'/0' ) {

        psrc--;

    } else {

        *(pdst-1) = L'/0';

    }

 

    inquote = FALSE;

 
 
 

    /* loop on each argument */

    for(;;)

    {

        if ( *psrc )

        {

            while (*psrc == L' ' || *psrc == L'/t')

            {
                ++psrc;
            }

        }

 

        if (*psrc == L'/0')

            break;              /* end of args */
 

        /* scan an argument */

        argv[ (*pNumArgs)++ ] = pdst;

 

        /* loop through scanning one argument */

        for (;;)

        {

            copychar = 1;

            /* Rules: 2N backslashes + " ==> N backslashes and begin/end quote

               2N+1 backslashes + " ==> N backslashes + literal "

               N backslashes ==> N backslashes */

            numslash = 0;
            while (*psrc == L'//')
            {

                /* count number of backslashes for use below */

                ++psrc;
                ++numslash;
            }
            if (*psrc == L'"')
            {

                /* if 2N backslashes before, start/end quote, otherwise

                   copy literally */
                if (numslash % 2 == 0)
                {
                    if (inquote)
                    {

                        if (psrc[1] == L'"')

                        {
                            psrc++;    /* Double quote inside quoted string */
                        }
                        else
                        {

                            /* skip first quote char and copy second */

                            copychar = 0;
                        }
                    }
                    else
                    {
                        copychar = 0;       /* don't copy quote */
                    }
                    inquote = !inquote;
                }
                numslash /= 2;          /* divide numslash by two */
            }

   

            /* copy slashes */
            while (numslash--)
            {
                *pdst++ = L'//';
            }

   

            /* if at end of arg, break loop */

            if (*psrc == L'/0' || (!inquote && (*psrc == L' ' || *psrc == L'/t')))

                break;

   

            /* copy character into argument */

            if (copychar)
            {
                *pdst++ = *psrc;
            }
            ++psrc;

        }

 

        /* null-terminate the argument */

 

        *pdst++ = L'/0';          /* terminate string */

    }

 

    /* We put one last argument in -- a null ptr */

    argv[ (*pNumArgs) ] = NULL;

 

    _ASSERTE((BYTE*)pdst <= (BYTE*)pAlloc + cbAlloc);

    return argv;

}
有关CLR执行Main函数执行过程的更多有关内容我会在下篇Rotor源码研究中详细解释,敬请关注。


Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1852691


原创粉丝点击