awk&sed精要

来源：互联网发布：有了域名能干什么编辑：程序博客网时间：2024/05/17 03:14

[root@centos-fuwenchao ~]# awk /root/ /etc/passwdroot:x:0:0:root:/root:/bin/bashoperator:x:11:0:operator:/root:/sbin/nologin[root@centos-fuwenchao ~]# awk -F: /root/print $1 /etc/passwdawk: /root/printawk:       ^ syntax error[root@centos-fuwenchao ~]# awk -F: /root/{print $1} /etc/passwdawk: cmd. line:1: /root/{printawk: cmd. line:1:             ^ unexpected newline or end of string[root@centos-fuwenchao ~]# awk -F: '/root/{print $1}' /etc/passwdrootoperator[root@centos-fuwenchao ~]# awk -F: 'begin {print "wenchao"}/root/{print $1}' /etc/passwdrootoperator[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}' /etc/passwdwenchaorootoperator[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}end{print "wenchaojieshufu"}' /etc/passwdwenchaorootoperator[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}END{print "wenchaojieshufu"}' /etc/passwdwenchaorootoperatorwenchaojieshufu[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1$7}END{print "wenchaojieshufu"}' /etc/passwdwenchaoroot/bin/bashoperator/sbin/nologinwenchaojieshufu[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1\T$7}END{print "wenchaojieshufu"}' /etc/passwdawk: BEGIN {print "wenchao"}/root/{print $1\T$7}END{print "wenchaojieshufu"}awk:                                       ^ backslash not last character on line[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1\t$7}END{print "wenchaojieshufu"}' /etc/passwdawk: BEGIN {print "wenchao"}/root/{print $1\t$7}END{print "wenchaojieshufu"}awk:                                       ^ backslash not last character on line[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print "$1\t$7"}END{print "wenchaojieshufu"}' /etc/passwdwenchao$1      $7$1      $7wenchaojieshufu[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wen\tchao"}/root/{print "$1\t$7"}END{print "wenchao\tjieshufu"}' /etc/passwdwen     chao$1      $7$1      $7wenchao jieshufu[root@centos-fuwenchao ~]#

参见我的另外博客：http://fuwenchao.blog.51cto.com/6008712/1341500

[root@centos-fuwenchao tmp]# awk -f blank num.txt this is blank linethis is blank linethis is blank linethis is blank linethis is blank line[root@centos-fuwenchao tmp]# more blank /^$/{print "this is blank line"}[root@centos-fuwenchao tmp]#

# test for integer, string or empty line./[0-9]+/ { print "That is an integer" }/[A-Za-z]+/ { print "This is a string" }/^$/ { print "This is a blank line." }

[root@centos-fuwenchao tmp]# awk -f blankint 4That is an integer5That is an integertThis is a stringgThis is a stringThis is a blank line.^C[root@centos-fuwenchao tmp]# more blankint # test for integer, string or empty line./[0-9]+/ { print "That is an integer" }/[A-Za-z]+/ { print "This is a string" }/^$/ { print "This is a blank line." }[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# echo a b c d | awk 'BEGIN { one = 1; two = 2 } { print $(one + two) }'c[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f nameaddr.awk  nameaddrJohnRobinson,KorenInc.,9784th Ave.,Boston,MAPhyllisChapman,GVECorp.,34Sea Drive,Amesbury,MA01881,879-0900[root@centos-fuwenchao tmp]# more nameaddr.awk { print "" # output blank lineprint $1 # nameprint $2 # companyprint $3 # streetprint $4, $5 # city, state zip}[root@centos-fuwenchao tmp]# more nameaddrJohn Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900[root@centos-fuwenchao tmp]# awk -F , -f nameaddr.awk  nameaddrJohn RobinsonKoren Inc.978 4th Ave.Boston MA 01760Phyllis ChapmanGVE Corp.34 Sea DriveAmesbury MA01881[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f nameaddr.awk nameaddrJohn RobinsonKoren Inc.978 4th Ave.Boston MA 01760Phyllis ChapmanGVE Corp.34 Sea DriveAmesbury MA01881[root@centos-fuwenchao tmp]# more nameaddr.awk BEGIN {FS=","}{ print "" # output blank lineprint $1 # nameprint $2 # companyprint $3 # streetprint $4, $5 # city, state zip}[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '/John/' nameaddrJohn Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987[root@centos-fuwenchao tmp]# awk '~/John/' nameaddrawk: ~/John/awk: ^ syntax error[root@centos-fuwenchao tmp]# awk '$1 ~ /John/' nameaddrJohn Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987[root@centos-fuwenchao tmp]# awk '$1 !~ /John/' nameaddrPhyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900[root@centos-fuwenchao tmp]# awk '$2 !~ /John/' nameaddrJohn Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900[root@centos-fuwenchao tmp]# awk '$2 ~ /John/' nameaddr[root@centos-fuwenchao tmp]# more namename: No such file or directory[root@centos-fuwenchao tmp]# more nameaddrJohn Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '/^$/{x++} END{print x}' num.txt 5[root@centos-fuwenchao tmp]# more num.txt 12345678[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f culcu.awk culfile john 87.4andrea 86jasper 85.6[root@centos-fuwenchao tmp]# more culcu.awk { total = $2 + $3 + $4 + $5 + $6avg = total / 5print $1, avg }[root@centos-fuwenchao tmp]# more culfile john 85 92 78 94 88andrea 89 90 75 90 86jasper 84 88 80 92 84[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao home]# lltotal 16drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracledrwx------.  6 testuser    testuser 4096 Nov  5  2013 testuserdrwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchaodrwx------. 27         500      500 4096 Nov  5  2013 wenchao[root@centos-fuwenchao home]# ls -l $* | awk '{print $5, "\t", $9}'         4096     oracle4096     testuser4096     userwenchao4096     wenchao[root@centos-fuwenchao home]#

[root@centos-fuwenchao home]# ll |awk -f /tmp/filesize.awk BYTES    FILE         oracle   4096testuser         4096userwenchao      4096wenchao          4096Total:  16384 bytes (5 files)[root@centos-fuwenchao home]# more /tmp/filesize.awk BEGIN { print "BYTES", "\t", "FILE" }{sum += $5++filenumprint $9, "\t", $5}END { print "Total: ", sum, "bytes (" filenum " files)" }[root@centos-fuwenchao home]# lltotal 16drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracledrwx------.  6 testuser    testuser 4096 Nov  5  2013 testuserdrwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchaodrwx------. 27         500      500 4096 Nov  5  2013 wenchao

[root@centos-fuwenchao home]# ll |awk -f /tmp/filesize.awk BYTES    FILE         oracle   4096testuser         4096userwenchao      4096wenchao          4096Total:  16384 bytes (5 files)[root@centos-fuwenchao home]# lltotal 16drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracledrwx------.  6 testuser    testuser 4096 Nov  5  2013 testuserdrwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchaodrwx------. 27         500      500 4096 Nov  5  2013 wenchao[root@centos-fuwenchao home]# more /tmp/filesize.awk BEGIN { print "BYTES", "\t", "FILE" }{sum += $5filenum++print $9, "\t", $5}END { print "Total: ", sum, "bytes (" filenum " files)" }[root@centos-fuwenchao home]#

[root@centos-fuwenchao oracle]# lltotal 56-rw-r--r--.  1 oracle oinstall    44 Mar 12 00:32 afiedt.buf-rw-r--r--.  1 oracle oinstall  2093 Mar 11 11:12 demobld.sql-rw-r--r--.  1 oracle oinstall   783 Feb 26 11:16 file_spool.lst-rw-r--r--.  1 oracle oinstall   479 Mar 11 11:32 login.sqldrwxr-xr-x. 19 root   root      4096 Mar 11 11:29 oracleDSTomdrwxr-x---.  3 oracle oinstall  4096 Feb 19 15:32 oradiag_oracle-rw-r--r--.  1 oracle oinstall   524 Feb 26 13:37 save.sql-rw-r-----.  1 oracle oinstall 16890 Feb 19 16:07 sum-rw-r--r--.  1 oracle oinstall   491 Feb 26 12:39 tempwenchaodb.txt-rw-r--r--.  1 oracle oinstall    10 Feb 26 10:33 wen.sql[root@centos-fuwenchao oracle]# ls -l $* | awk '> # filesum: list files and total size in bytes> # input: long listing produced by "ls -l"> #1 output column headers> BEGIN { print "BYTES", "\t", "FILE" }> #2 test for 9 fields; files begin with "-"> NF == 9 && /^-/ {> sum += $5 # accumulate size of file> ++filenum # count number of files> print $5, "\t", $9 # print size and filename> }> #3 test for 9 fields; directory begins with "d"> NF == 9 && /^d/ {> print "<dir>", "\t", $9 # print <dir> and name> }> #4 test for ls -lR line ./dir:> $1 ~ /^\..*:$/ {> print "\t" $0 # print that line preceded by tab> }> #5 once all is done,> END {> # print total file size and number of files> print "Total: ", sum, "bytes (" filenum " files)"> }'BYTES    FILE44       afiedt.buf2093     demobld.sql783      file_spool.lst479      login.sql<dir>    oracleDSTom<dir>    oradiag_oracle524      save.sql16890    sum491      tempwenchaodb.txt10       wen.sqlTotal:  21314 bytes (8 files)[root@centos-fuwenchao oracle]#

ls -l $* | awk '# filesum: list files and total size in bytes# input: long listing produced by "ls -l"#1 output column headersBEGIN { print "BYTES", "\t", "FILE" }#2 test for 9 fields; files begin with "-"NF == 9 && /^-/ {sum += $5 # accumulate size of file++filenum # count number of filesprint $5, "\t", $9 # print size and filename}#3 test for 9 fields; directory begins with "d"NF == 9 && /^d/ {print "<dir>", "\t", $9 # print <dir> and name}#4 test for ls -lR line ./dir:$1 ~ /^\..*:$/ {print "\t" $0 # print that line preceded by tab}#5 once all is done,END {# print total file size and number of filesprint "Total: ", sum, "bytes (" filenum " files)"}'

printf ( format-expression [, arguments] )c ASCII characterd Decimal integeri Decimal integer. (Added in POSIX)e Floating-point format ([-]d.precisione[+-]dd)E Floating-point format ([-]d.precisionE[+-]dd)f Floating-point format ([-]ddd.precision)g e or f conversion, whichever is shortest, with trailing zeros removedG E or f conversion, whichever is shortest, with trailing zeros removedo Unsigned octal values Stringx Unsigned hexadecimal number. Uses a-f for 10 to 15X Unsigned hexadecimal number. Uses A-F for 10 to 15% Literal %printf("%d\t%s\n", $5, $9)

%-width.precision format-specifier

[root@centos-fuwenchao tmp]# awk '{printf("|%10s|\n", "hello")}' num.txt|     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello||     hello|[root@centos-fuwenchao tmp]# awk '{printf("%10s\n", "hello")}' num.txt     hello     hello     hello     hello     hello     hello     hello     hello     hello     hello     hello     hello     hello[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '{printf("|%-10s|\n", "hello")}' num.txt|hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     ||hello     |[root@centos-fuwenchao tmp]#

The precision modifier, used for decimal or floating-point values, controls the number of digits that
appear to the right of the decimal point. For string values, it controls the maximum number of characters
from the string that will be printed. Note that the default precision for the output of numeric values is
"%.6g".
You can specify both the width and precision dynamically, via values in the printf or sprintf
argument list. You do this by specifying asterisks, instead of literal values.

printf("%*.*g\n", 5, 3, myvar);
In this example, the width is 5, the precision is 3, and the value to print will come from myvar.
The default precision used by the print statement when outputting numbers can be changed by setting
the system variable OFMT. For instance, if you are using awk to write reports that contain dollar values,
you might prefer to change OFMT to "%.2f".
Using the full syntax of the format expression can solv

[root@centos-fuwenchao tmp]# awk  '{print high "\t" low}' high=100 low=60 num.txt 100     60100     60100     60100     60100     60100     60100     60100     60100     60100     60100     60100     60100     60[root@centos-fuwenchao tmp]#

$ awk -f scriptfile high=100 low=60 datafile
Inside the script, these two variables are available and can be accessed as any awk variable. If you were
to put this script in a shell script wrapper, then you could pass the shell's command-line arguments as
values. (The shell makes available command-line arguments in the positional variables - $1 for the first
parameter, $2 for the second, and so on.)For instance, look at the shell script version of the previous
command:
[13] Careful! Don't confuse the shell's parameters with awk's field variables.
awk -f scriptfile "high=$1" "low=$2" datafile
If this shell script were named awket, it could be invoked as:
$ awket 100 60
"100" would be $1 and passed as the value assigned to the variable high.
In addition, environment variables or the output of a command can be passed as the value of a variable.
Here are two examples:

$ awk '{ print NR, $0 }' OFS='. ' names1. Tom 656-57892. Dale 653-21333. Mary 543-11224. Joe 543-2211

[root@centos-fuwenchao tmp]# awk 'BEGIN { print n }{if (n == 1) print "Reading the first file"if (n == 2) print "Reading the second file"}' n=1 num.txt n=2 culcu.awk Reading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the first fileReading the second fileReading the second fileReading the second file[root@centos-fuwenchao tmp]# more num.txt 12345678[root@centos-fuwenchao tmp]# more culcu.awk { total = $2 + $3 + $4 + $5 + $6avg = total / 5print $1, avg }[root@centos-fuwenchao tmp]#

解释：

There are four command-line parameters: "n=1," "test," "n=2," and "test2". Now, if you remember that a
BEGIN procedure is "what we do before processing input," you'll understand why the reference to n in
the BEGIN procedure returns nothing. So the print statement will print a blank line. If the first
parameter were a file and not a variable assignment, the file would not be opened until the BEGIN
procedure had been executed.

The variable n is given an initial value of 1 from the first parameter. The second parameter supplies the
name of the file. Thus, for each line in test, the conditional "n == 1" will be true. After the input is
exhausted from test, the third parameter is evaluated, and it sets n to 2. Finally, the fourth parameter
supplies the name of a second file. Now the conditional "n == 2" in the main procedure will be true.

One consequence of the way parameters are evaluated is that you cannot use the BEGIN procedure to
test or verify parameters that are supplied on the command line. They are available only after a line of
input has been read. You can get around this limitation by composing the rule "NR == 1" and using its
procedure to verify the assignment. Another way is to test the command-line parameters in the shell
script before invoking awk.

POSIX awk provides a solution to the problem of defining parameters before any input is read. The -v
option[14] specifies variable assignments that you want to take place before executing the BEGIN
procedure (i.e., before the first line of input is read.) The -v option must be specified before a commandline
script. For instance, the following command uses the -v option to set the record separator for
multiline records.
[14] The -v option was not part of the original (1987) version of nawk (still used on
SunOS 4.1.x systems and some System V Release 3.x systems). It was added in 1989 after
Brian Kernighan of Bell Labs, the GNU awk authors, and the authors of MKS awk agreed
on a way to set variables on the command line that would be available inside the BEGIN
block. It is now part of the POSIX specification for awk.
$ awk -F"\n" -v RS="" '{ print }' phones.block
A separate -v option is required for each variable assignment that is passed to the program.

--传参

[root@centos-fuwenchao tmp]# ./acron.sh BASICBASIC Beginner's All-Purpose Symbolic Instruction Code[root@centos-fuwenchao tmp]# more acron.sh #! /bin/sh# assign shell's $1 to awk search variableawk '$1 == search' search=$1 acronyms[root@centos-fuwenchao tmp]# more acronyms BASIC Beginner's All-Purpose Symbolic Instruction CodeCICS Customer Information Control SystemCOBOL Common Business Oriented LanguageDBMS Data Base Management SystemGIGO Garbage In, Garbage OutGIRL Generalized Information Retrieval Language[root@centos-fuwenchao tmp]#

Notice that we tested the parameter as a string ($1 == search). We could also have written this as a regular
expression match ($1 ~ search).

== 是要是要完全一样如果你输入 BASI是没有任何打印的

~ 包含，上面的可以打印！

Conditionals, Loops, and Arrays

condition

if ( expression )action1[elseaction2]

Remember that "==" is a relational operator and "=" is an assignment operator. We can also test whether
x matches a pattern using the pattern-matching operator "~":
if ( x ~ /[yY](es)?/ ) print x

if (avg >= 90) grade = "A"
else if (avg >= 80) grade = "B"
else if (avg >= 70) grade = "C"
else if (avg >= 60) grade = "D"
else grade = "F"

expr ? action1 : action2

grade = (avg >= 65) ? "Pass" : "Fail"

loop

while (condition)
action

i = 1while ( i <= 4 ) {print $i++i}

do
action
while (condition)

BEGIN {do {++xprint x} while ( x <= 4 )}

[root@centos-fuwenchao tmp]# awk -f do.awk 12345[root@centos-fuwenchao tmp]# more do.awk BEGIN {do {++xprint x} while ( x <= 4 )}[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f prinum.awk tesenum 3 : 1302 : 1154 : 1141 : 108[root@centos-fuwenchao tmp]# more prinum.awk { total=i= 0do {++itotal += $i} while ( total <= 100 )print i, ":", total}[root@centos-fuwenchao tmp]# more tesenum 45 25 60 2010 105 50 4033 5 9 67108 3 5 4[root@centos-fuwenchao tmp]#

for

for ( set_counter ; test_counter ; increment_counter )
action

set_counter
Sets the initial value for a counter variable.
test_counter
States a condition that is tested at the top of the loop.
increment_counter
Increments the counter each time at the bottom of the loop, right before testing the test_counter
again.

total = $2 + $3 + $4 + $5 + $6
avg = total / 5

total = 0for (i = 2; i <= NF; ++i)total += $iavg = total / (NF - 1)

[root@centos-fuwenchao tmp]# ./cheng.sh Enter number: 8The factorial of 8 is 40320[root@centos-fuwenchao tmp]# ./cheng.sh Enter number: 99The factorial of 99 is 9.33262e+155[root@centos-fuwenchao tmp]# more cheng.sh #!/bin/bashawk '# factorial: return factorial of user-supplied numberBEGIN {printf("Enter number: ")}# check that user enters a number$1 ~ /^[0-9]+$/ {# assign value of $1 to number & factnumber = $1if (number == 0)fact = 1elsefact = number# loop to multiply fact*x until x = 1for (x = number - 1; x > 1; x--)fact *= xprintf("The factorial of %d is %g\n", number, fact)# exit -- saves user from typing CRTL-D.exit}# if not a number, prompt again.{ printf("\nInvalid entry. Enter a number: ")}'  [root@centos-fuwenchao tmp]#

脚本的最后（}后面）可以显示的使用 - 表示从标准输入中读取数据，否则默认从标准输入！

array

flavor_count = 5
for (x = 1; x <= flavor_count; ++x)
print flavor[x]

END {
for ( x = 1; x <= NR; x++ )
class_avg_total += student_avg[x]
class_average = class_avg_total / NR
for ( x = 1; x <= NR; x++ )
if (student_avg[x] >= class_average)
++above_average
else
++below_average

print "Class Average: ", class_average
print "At or Above Average: ", above_average
print "Below Average: ", below_average
}

for ( item in acro )
print item, acro[item]

[root@centos-fuwenchao tmp]# awk 'BEGIN { data[1.23] = "3.21";  printf "<%s>\n", data[1.23] }'<3.21>[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f grade.awk grademona    79      Cjohn    88      Bandrea  90.5    Ajasper  85      Bdunce   64.5    Dellis   93.5    AClass Average:  83.4167At or Above Average:    4Below Average:  2A:      2B:      2C:      1D:      1[root@centos-fuwenchao tmp]# more grademona 70 77 85 83 70 89john 85 92 78 94 88 91andrea 89 90 85 94 90 95jasper 84 88 80 92 84 82dunce 64 80 60 60 61 62ellis 90 98 89 96 96 92[root@centos-fuwenchao tmp]# more grade.awk # grades.awk -- average student grades and determine# letter grade as well as class averages.# $1 = student name; $2 - $NF = test scores.# set output field separator to tab.BEGIN { OFS = "\t" }# action applied to all input lines{# add up gradestotal = 0for (i = 2; i <= NF; ++i)total += $i# calculate averageavg = total / (NF - 1)# assign student's average to element of arraystudent_avg[NR] = avg# determine letter gradeif (avg >= 90) grade = "A"else if (avg >= 80) grade = "B"else if (avg >= 70) grade = "C"else if (avg >= 60) grade = "D"else grade = "F"# increment counter for letter grade array++class_grade[grade]# print student name, average and letter gradeprint $1, avg, grade}# print out class statisticsEND {# calculate class averagefor (x = 1; x <= NR; x++)class_avg_total += student_avg[x]class_average = class_avg_total / NR# determine how many above/below averagefor (x = 1; x <= NR; x++)if (student_avg[x] >= class_average)++above_averageelse++below_average# print resultsprint ""print "Class Average: ", class_averageprint "At or Above Average: ", above_averageprint "Below Average: ", below_average# print number of students per letter gradefor (letter_grade in class_grade)print letter_grade ":", class_grade[letter_grade] | "sort"}[root@centos-fuwenchao tmp]#

解释：

However, an array makes this task much easier. We can define an array called class_grade, and
simply use the letter grade (A through F) as the index to the array.
++class_grade[grade]
Thus, if the grade is an "A" then the value of class_grade["A"] is incremented by one. At the end
of the program, we can print out these values in the END rule using the special for loop:
for (letter_grade in class_grade)
print letter_grade ":", class_grade[letter_grade] |
"sort"
The variable letter_grade references a single subscript of the array class_grade each time
through the loop. The output is piped to sort, to make sure the grades come out in the proper order.
(Piping output to programs is discussed in Chapter 10, The Bottom Drawer.) Since this is the last
addition we make to the grades.awk script, we can look at the full listing.

经典：输出参数，求出对应值，应用于目录索引

[root@centos-fuwenchao tmp]# more lookup.sh awk '# lookup -- reads local glossary file and prompts user for query#0BEGIN {  OFS = "\t"# prompt userprintf("Enter a glossary term: ")}#1 read local file named glossaryFILENAME == "glossary" {# load each glossary entry into an arrayentry[$1] = $2next}#2 scan for command to exit program$0 ~ /^(quit|[qQ]|exit|[Xx])$/ { exit }#3 process any non-empty line$0 != "" {if ( $0 in entry ) {# it is there, print definitionprint entry[$0]} elseprint $0 " not found"}#4 prompt user again for another term{printf("Enter another glossary term (q to quit): ")}' glossary -[root@centos-fuwenchao tmp]# more glossary BASIC Beginner's All-Purpose Symbolic Instruction CodeCICS Customer Information Control SystemCOBOL Common Business Oriented LanguageDBMS Data Base Management SystemGIGO Garbage In, Garbage OutGIRL Generalized Information Retrieval Language[root@centos-fuwenchao tmp]# ./lookup.sh Enter a glossary term: GIGOGarbageEnter another glossary term (q to quit): BASICBeginner'sEnter another glossary term (q to quit): WENCHAOWENCHAO not foundEnter another glossary term (q to quit): Q[root@centos-fuwenchao tmp]#

解释：lookup.sh 最后的-意味着用标准输入中读取参数，它保存在$0中，entry类似于entry[BASIC]=Beginner's，是文件中的项，而不是shell中的传参！

官方解释：

Once input from glossary is exhausted, awk reads from standard input because "-" is specified on the
command line. Standard input is where the user's response comes from. Rule #3 tests that the input line
($0) is not empty. This rule should match whatever the user types. The action uses in to see if the input
line is an index in the array. If it is, it simply prints out the corresponding value. Otherwise, we tell the
user that no valid entry was found.
After rule #3, rule #4 will be evaluated. This rule simply prompts the user for another entry. Note that
regardless of whether a valid entry was processed in rule #3, rule #4 is executed. The prompt also tells

the user how to quit the program. After this rule, awk looks for the next line of input.

那rule1中的next是什么作用呢？

我把next删掉，运行看下是什么效果！

[root@centos-fuwenchao tmp]# ./lookup.sh Enter a glossary term: BASIC Beginner's All-Purpose Symbolic Instruction Code not foundEnter another glossary term (q to quit): CICS Customer Information Control System not foundEnter another glossary term (q to quit): COBOL Common Business Oriented Language not foundEnter another glossary term (q to quit): DBMS Data Base Management System not foundEnter another glossary term (q to quit): GIGO Garbage In, Garbage Out not foundEnter another glossary term (q to quit): GIRL Generalized Information Retrieval Language not foundEnter another glossary term (q to quit): GIGOGarbageEnter another glossary term (q to quit): BASICBeginner'sEnter another glossary term (q to quit):

官方解释：

where $1 is the term and $2 is the definition. The next statement at the end of rule #1 is used to skip
other rules in the script and causes a new line of input to be read. So, until all the entries in the glossary
file are read, no other rule is evaluated.

现在具体解释一下这个脚本

首先是begin：他定义了输出分隔符。你也可以定义输入分隔符像这样】

BEGIN { FS = "\t"; OFS = "\t"
# prompt user
printf("Enter a glossary term: ")
}

接着

在#1这里：检查当前输入的文件是否是glossarry，如果是的话则通过entry[term] = definition载入数组，注意{前不用&&

当文件载入完全之后从标准输入中读取一个项，具体是靠next实现的，详细看我的另外博文！

从标准输入中读入的项目一次和#2 ， #3 匹配

if 输入的是q 则退出

if 输出的不为空，则进入代码块进行 if 判断，如果输入的项=entry索引项，则打印索引值，否则打印该项not found，接着运行到 #4，打印 Enter another glossary term (q to quit): ，

打印完了之后，接着等待接受标准输入的输入

（sed & awk P315）

具体解释看我的另外博文！

ps：

< Less than
> Greater than
<= Less than or equal to
>= Greater than or equal to
== Equal to
!= Not equal to
~ Matches
!~ Does not match

split

n = split(string, array, separator)

string is the input string to be parsed into elements of the named array. The array's indices start at 1 and
go to n, the number of elements in the array. The elements will be split based on the specified separator
character. If a separator is not specified, then the field separator (FS) is used. The separator can be a full
regular expression, not just a single character. Array splitting behaves identically to field splitting

z = split($1, array, " ")for (i = 1; i <= z; ++i)print i, array[i]

This shell script takes the first argument from the command line and echoes it as input to the awk
program.

echo $1 |awk '# romanum -- convert number 1-10 to roman numeral# define numerals as list of roman numerals 1-10BEGIN {# create array named numerals from list of romannumeralssplit("I,II,III,IV,V,VI,VII,VIII,IX,X", numerals,",")}# look for number between 1 and 10$1 > 0 && $1 <= 10 {# print specified elementprint numerals[$1]exit}{ print "invalid number"exit}'--$ romanum 4IV

日期转换

awk '# date-month -- convert mm/dd/yy or mm-dd-yy to month day,year# build list of months and put in array.BEGIN {# the 3-step assignment is done for printing in booklistmonths = "January,February,March,April,May,June,"listmonths = listmonths "July,August,September,"listmonths = listmonths "October,November,December"split(listmonths, month, ",")}# check that there is input$1 != "" {# split on "/" the first input field into elements of arraysizeOfArray = split($1, date, "/")# check that only one field is returnedif (sizeOfArray == 1)# try to split on "-"sizeOfArray = split($1, date, "-")# must be invalidif (sizeOfArray == 1)exit# add 0 to number of month to coerce numeric typedate[1] += 0# print month day, yearprint month[date[1]], (date[2] ", 19" date[3])}'

---

$ echo "5/11/55" | date-month
May 11, 1955

解释倒数第四行 date[1] += 0

However, before using

date[1], we coerce the type of date[1] by adding 0 to it. While awk will correctly interpret "11" as
a number, leading zeros may cause a number to be treated as a string. Thus, "06" might not be
recognized properly without type coercion. The element referenced by date[1] is used as the
subscript for month.

简言之：就是string转换为number，要不然使用month[date[1] ]的时候会出错

删除数组元素

delete array[subscript]

http://www.cnblogs.com/chengmo/archive/2010/10/10/1846991.html

awk 分析web日志（页面执行时间）(常见应用3)

http://www.cnblogs.com/chengmo/archive/2010/06/28/1766876.html

nginx日志访问次数最多及最耗时的页面(慢查询）

http://www.cnblogs.com/chengmo/archive/2010/10/11/1847515.html

awk 运算符（算术运算符，赋值运算符，关系运算符，逻辑运算符，正则运算符）说明

0 0