nrgrep函数分析(2)--搜索参数探讨

来源:互联网 发布:levirebrushed mac 编辑:程序博客网 时间:2024/06/05 07:40

为了更好的了解nrgrep程序是如何实现各类不同的搜索(精确的简单搜索、扩展搜索、正则表达式搜索;近似的简单、扩展、正则表达式搜索等),以及各类参数的功能又是如何定义、有什么功能。我们在此部分对程序中的参数作了详细的叙述与分析(参考nr-grep.pdf第八章A Pattern Matching Software)。

 

./nrgrep [-iclGhnvdbmskL] <pattern> <list of files>

 

一、后缀参数Options功能分析

 

-i: the search is case insensitive,该参数取消大小写敏感;

Bool OptCaseInsensitive = true;

 

-w: only matches whole words,仅输出匹配整个模式串的单词;

Bool OptWholeWord = true;

 

-x: only matches whole records,仅输出匹配整个模式串的句子;

Bool OptWholeRecord = true;

 

-c: just counts the matches, does not print them,只打印匹配数;

Bool OptRecPrint = false;

 

-l: output filenames only, not their contents,输出包含匹配模式串的文件名;

Bool OptRecFiles = true;

 

-G: output whole files,输出包含匹配模式串的文件所有内容

Bool OptRecPrintFiles = true;

 

-h: do not output file names,不输出文件名(-hl 会只输出匹配数)

Bool OptRecFileNames = false;

 

-n: output records preceded by record number,输出行数

Bool OptRecNumber = true;

 

-v: report nonmatching records,输出非匹配的行

OptRecPositive = false;

 

-d <delim>: sets the record delimiter to <delim>,设置分隔符<delim>,默认为/n

{

             byte *OptRecPatt = optarg; /* opts[1]; <delim> */

              OptRecPos = 0;

              if (OptRecPatt[strlen(OptRecPatt)-1] == '#')

             { OptRecPos = strlen(OptRecPatt)-1;

               OptRecPatt[OptRecPos] = 0;

             }

              if (strlen(OptRecPatt) == 1)

             OptRecChar = OptRecPatt[0];

              else OptRecChar = -1;

 

}

 

-b <bufsize>: sets the buffer size to <bufsize> in Kb Default is 65536

Int OptBufSize = atoi(optarg); /* atoi(opts[1]); */

 

-m <bits>: sets the maximum table sizes to 2^<bits> words Default is 16

{

              i = atoi(optarg); /* atoi(opts[1]); */

              if ((i<=0) || (i>W) || (W % i))

             { warn2("The number of bits must be between 1 and %i "

                "and divide %i, after -m",W,W);

             }

              else OptDetWidth = i;

}

 

-s <sep>: sets the output record separator to <sep>, 设置行的分隔符,用来标志与区分行;

{

               OptRecSep = malloc (strlen(optarg));

               i = 0; j = 0;

               while (optarg[i]) /* opts[1] */

              OptRecSep[j++] = getAchar (optarg,&i);

               OptRecSep[j] = 0;

}

 

-k <err>[idst]: allow up to <err> errors in the matches

         [idst] means permitting ins, del, subs, transp operations

         (default is all)

    {

              if (optarg[0] && !isdigit(optarg[strlen(optarg)-1]))

             { OptIns = OptDel = OptSubs = OptTransp = false;

                   do { switch (optarg[strlen(optarg)-1])

                   { case 'i': OptIns = true; break;

                     case 'd': OptDel = true; break;

                     case 's': OptSubs = true; break;

                     case 't': OptTransp = true; break;

                     default: error0 ("<num>[idst] expected after -k");

                       }

                optarg[strlen(optarg)-1] = 0;

                  }

                   while (optarg[0] && !isdigit(optarg[strlen(optarg)-1]));

             }

              OptErrors = atoi(optarg); /* atoi(opts[1]); */

    }

 

-L: take pattern literally (no special characters)

 

 

二、模式的语法分析

1.简单模式

    简单模式就是一串字符,可以使用转义符如’/t’,’/n’,’xdd’

 

2.扩展模式

支持使用中括号[]来匹配[]内的其中一个字符,’^’代表求补;’?’代表任意可选字符;’*’代表字符可以出现0到多次,’+’表示可以出现1到多次;'A-Z''#' (any separator) and '.' (any character)

 

3.正则表达式

  Finally, the most complex patterns that can be expressed are the regular

  expressions, which also permit the union operator '|' (e.g. 'abc|de' matches

  the strings 'abc' and 'de') and the parenthesis '(' ')' to enclose

  subexpressions, so that '?', '*' and '+' can be applied to complete

  expressions and not only letters, e.g. 'ab(cd|e)*fg?h'.

原创粉丝点击