cp命令C实现

来源：互联网发布：篆书扫描识别软件编辑：程序博客网时间：2024/05/02 20:50

1.从有需求做起，不要盲目看书。

2.有计划，有步骤的学习。

3.主动学习，学会分享。

暂不说其他，我觉得第1条在我们学习过程中很值得注意。前段时间做了my_ls.c和my_shell.c。虽然都调试成功，但还是极大的参考了《linuxC编程实战》这本书。因为文件系统在linux中占据着重要地位，所以还是得多通过实践来理解这部分的内容，不妨大家和我一起做my_cp，练习一下文件操作这部分的系统调用函数。

既然我们要实现cp命令，必须得了解cp命令的基本使用方法。这方面的资料网上很多，并且可以参考man手册自己试试，在此我不再详细说明。my_cp将要实现以下基本的功能：

1.将一个文件拷贝到指定路径。（目的文件可存在也可不存在）。这是my_cp中最基本的功能，事实上2，3，4功能最终都会被分解成此功能。

2.将一个目录拷贝到指定目录下，此时必须加-R或-r选项。

3.将多个目录拷贝到指定目录下，此时必须加-R或-r选项。

4.将多个文件拷贝到指定目录下。这里的源文件即包含目录文件也包含普通文件。

个人建议，最好多在终端试试以上各种情况，重点看一看cp命令是如何处理非法命令的，这对我们下面的编程有帮助。

请注意，原文件中如果存在目录文件，并且输入的命令行参数未加-r或-R选项，则会自动忽略此源目录文件，而其他文件的拷贝不受影响。如：

1gues@huangwei-desktop:~/code/shell_command$ ls
2ls  ls1  my_cp  my_cp1  my_cp.c  my_ls.c  my_shell.c  newls.c  t.c  tdir  test  tfile.c  ttfile.c
3gues@huangwei-desktop:~/code/shell_command$ cp my_shell.c tdir/ test/
4cp: 略过目录"tdir/"
5gues@huangwei-desktop:~/code/shell_command$ ls test/
6my_shell.c

如果你完成了my_ls或者my_shell，你一定会知道，这类实现系统命令的程序在main函数中首先会对命令行参数进行解析，判断其合法性，然后再根据用户的输入（是否包含某选项或者参数个数等）来“引导”程序进入其他子函数，以便完成相应功能。想想my_ls程序，难道不是这样吗？

好了，我们开始吧！

首先进入主函数。由于本程序只支持-r或者-R选项，因此如果用户输入选项，我们来判断是否合法。如果合法让其标志param_r为真。由于-r或-R选项位置不一定限制在cp命令之后（事实上放在命令行参数末尾也可以），所以用index_r来保存其下标。实现代码如下：（因为插件原因，下面的代码在出现<或>符号的后面均加入了 \）

01char dest_path[PATH_MAX+1];
02char src_path[256][PATH_MAX+1];
03int i,k,num,src_num,index_r;
04struct stat buf;
05 
06//check the legality of the options,only -r or -R
07for(i=1;i<\argc;i++)
08{
09    if(argv[i][0]=='-')
10    {
11        if((!strcmp(argv[i],"-r")||!strcmp(argv[i],"-R")))
12        {
13            param_r=1;
14            index_r=i;
15        }
16        else
17        {
18            printf("my_cp:invalid options: %s\n",argv[i]);
19            exit(1);
20        }
21    }
22}

接下来计算命令行参数中参数的个数num以及源文件的个数src_num，这两个变量备用。如果num小于2，肯定不合法。上述要求都合法后，提取目标文件的路径，上面已经说过因为R和r选项后可以出现在参数末尾，因此得多加一次判断，并不能直接认为argv[argc-1]就是目标文件路径。

01if(param_r)
02    {
03        num=argc-1-1;
04        src_num=num-1;
05    }
06    else
07    {
08        num=argc-1;
09        src_num=num-1;
10    }
11    if(num<\2)
12    {
13        printf("my_cp: [option] (source) (dest)\n");
14        exit(1);
15    }
16    //extract the dest path
17    if(index_r!=argc-1)
18    {
19        strcpy(dest_path,argv[argc-1]);
20    }
21    else
22    {
23        strcpy(dest_path,argv[argc-2]);
24    }

接下来提取源文件的路径，由于源文件可以有多个，因此我们用一个字符串数组来存储源文件路径。

01//extract the src path
02k=0;
03for(i=1;i<\argc-1;i++)
04{
05    if(i==index_r&&param_r)
06        continue;
07    else
08    {
09        strcpy(src_path[k++],argv[i]);
10    }
11}

以上工作都做好后，我们可以”分流“了，即根据不同要求进入不同的子函数。我的“分流”原则是根据源文件数src_num。当其大于1时，说明源文件是多个。首先判断此目的文件是否存在，如果存在那么接着判断它是否为一个目录文件，因为多个源文件不可能拷贝到一个非目录文件当中。
确定了目的文件是一个目录后，我们要将这个目录下的所有文件依次调用子函数进行“分流”。
这里有两个重要的子函数，cp_single函数针对这样的情况：将单个文件拷贝到另一文件。（功能1）。cp_directory函数针对这样的情况：将单个文件夹拷贝到指定目录（功能2）。进入这两个子函数的依据就是，如果源文件是一个文件进入前者，源文件是目录进入后者。而我们用for循环将上述两函数有效的结合起来，就可以实现上述功能的3，4。

01if(src_num>\1)
02{
03    if(stat(dest_path,&buf)==-1)
04    {
05        printf("my_cp: \"%s\" is not a directory.\n",dest_path);
06        exit(1);
07    }
08    //the dest path is valid
09    if(S_ISDIR(buf.st_mode))
10    {
11        strcpy(temp_dest_path,dest_path);
12        //the dest path is directory
13        for(i=0;i<\src_num;i++)
14        {
15            if(stat(src_path[i],&buf)==-1)
16            {
17                printf("my_cp: can't get file status of \"%s\" : no this file or directory.\n",src_path[i]);
18                continue;
19            }
20            //the src_path exist
21            if(!S_ISDIR(buf.st_mode))
22            {
23                cp_single(src_path[i],dest_path);
24            }
25            elseif(param_r)
26            {
27                cp_directory(src_path[i],dest_path);
28 
29            }
30            else
31            {
32                printf("my_cp: skip the directory: \"%s\".\n",src_path[i]);
33            }
34            strcpy(dest_path,temp_dest_path);
35 
36        }
37 
38    }
39    else
40    {
41        printf("my_cp: \"%s\" is not a directory.\n",dest_path);
42        exit(1);
43    }
44}
45else
46{
47    //The code here be omited
48}

我们接下来主要来看源文件数目为1的情况。其实这属于cp命令最基本的功能，cp_directory函数也会调用这个函数。
源文件存在的时候，如果源文件是一个目录并且有r或R选项，那么进入cp_directory函数（至于目标文件是否为目录，进入此函数可以判断）。如果源文件不是目录文件，那么进入cp_single函数即可。

01if(src_num>\1)
02{
03    //The code here be omited
04}
05else
06{
07    //only one src path
08    if(stat(src_path[0],&buf)==-1)
09    {
10        printf("my_cp: can't get file status of \"%s\" : no this file or directory.\n",src_path[0]);
11        exit(1);
12    }
13    if(S_ISDIR(buf.st_mode))
14    {
15        if(param_r)
16        {
17            cp_directory(src_path[0],dest_path);
18            exit(0);
19        }
20        else
21        {
22            printf("my_cp: skip the directory: \"%s\".\n",src_path[0]);
23            exit(1);
24        }
25    }
26    else
27            {
28        cp_single(src_path[0],dest_path);
29    }
30}

1.成功将一个已存在源文件复制到另一个指定文件名的文件中。

01gues@huangwei-desktop:~/code/shell_command$ ls
02cptest  dd  dd1  ed  ls  ls1  my_cp  my_cp1  my_cp.c  my_ls.c  my_shell.c  newls.c  tdir  test  tfile.c
03gues@huangwei-desktop:~/code/shell_command$ ./my_cp tfile.c ttfile.c
04gues@huangwei-desktop:~/code/shell_command$ ls -l
05总用量 124
06-rw-r--r-- 1 gues gues  7378 2010-06-22 23:58 my_ls.c
07-rw-r--r-- 1 gues gues  6271 2010-07-17 14:29 my_shell.c
08-rw-r--r-- 1 gues gues  7378 2010-07-25 17:20 newls.c
09drwxr-xr-x 2 gues gues  4096 2010-07-25 18:03 tdir
10drwxr-xr-x 3 gues gues  4096 2010-07-25 18:03 test
11-rw-r--r-- 1 gues gues  6271 2010-07-25 16:35 tfile.c
12-rw-r--r-- 1 gues gues  6271 2010-07-26 10:14 ttfile.c

2.将已存在的源文件拷贝到一个不存在的目录下，会提示错误信息。

1gues@huangwei-desktop:~/code/shell_command$ ./my_cp tfile.c ~/nothisdirectory/
2my_cp:can't create the file:"/home/gues/nothisdirectory/":it is a directory.

3.将不存在的源文件拷贝到一个目录或文件中，提示相应错误。这里的目标文件或指定目录是否存在不确定。因为只有一个源文件时，cp命令总先检查源文件是否存在。

1gues@huangwei-desktop:~/code/shell_command$ ./my_cp nothisfile ~/nothisdirectory
2my_cp: can't get file status of "nothisfile": no this file or directory.

4.成功将源文件拷贝到已存在的指定目录，由于指定路径没有文件名，因此目标文件名与源文件名相同。

1gues@huangwei-desktop:~/code/shell_command$ ./my_cp tfile.c ~/
2gues@huangwei-desktop:~/code/shell_command$ ls ~/
3code     Documents  EIOffice               EIOffice_Personal_Lin.tar.gz  Pictures   tfile.c  Yozo_Office
4cptest   Downloads  EIOfficelog.txt        examples.desktop              Public     tmp
5Desktop  edsionte   EIOffice_Personal_Lin  Music

5.之所以首先演示这些结果是因为我们在编写cp_single函数的时候都要考虑到这些情况，加之路径相对灵活可能少一个/就会产生不结果。比如下面的结果：

1gues@huangwei-desktop:~/code/shell_command$ ./my_cp tfile.c ~/nothisdirectory
2gues@huangwei-desktop:~/code/shell_command$ ls ~/
3code     Documents  EIOffice               EIOffice_Personal_Lin.tar.gz  nothisdirectory  Templates  Videos
4cptest   Downloads  EIOfficelog.txt        examples.desktop              Pictures         tfile.c    Yozo_Office
5Desktop  edsionte   EIOffice_Personal_Lin  Music

拷贝成功。这里我们输入的参数仅仅与2中输入的参数少一个/，为什么结果就大不相同？因为2中目标文件是一个不存在的目录（～/nothisdirectory/），而上面的命令是将已存在文件拷贝到已存在目录（～/）下，并且指定文件名为nothisdirectory。
好了，我们下面来分析代码。进入cp_single函数，我们将传递过来的路径拷贝到局部变量src_path和dest_path当中。因为cp_single函数可能在程序的一次运行中被调用多次，如果修改了传递过来的路径（指针）那么会导致下面的调用不正确。如果传递过来的源文件只是一个文件名，那么我们自动为其加上当前路径，这可以方便下面提取文件名。

01void cp_single(char*temp_src_path,char* temp_dest_path)
02{
03    structstat buf;
04    intlen;
05    charch[10],filename[PATH_MAX+1],dest_dir[PATH_MAX+1];
06    intfdrd,fdwt,i,j,k;
07    charsrc_path[PATH_MAX+1],dest_path[PATH_MAX+1];
08 
09    strcpy(src_path,temp_src_path);
10    strcpy(dest_path,temp_dest_path);
11    for(k=0;k<\strlen(src_path);k++)
12    {
13        if(src_path[k]=='/')
14        break;
15    }
16    chartemp_path[PATH_MAX+1]="./";
17    if(k==strlen(src_path))
18    {
19        strcat(temp_path,src_path);
20            strcpy(src_path,temp_path);
21    }
22 
23        //the following code be omited
24}

接着，从源文件路径中提取文件名。即提取最后一个/符号后面的字符串。

01//extract the file name from src_path
02for(i=strlen(src_path)-1;i>\0;i--)
03{
04    if(src_path[i]=='/')
05        break;
06}
07j=k=0;
08for(j=i;j<\strlen(src_path);j++)
09{
10    filename[k++]=src_path[j];
11}
12filename[k]='\0';

如果目标文件路径存在，并且不含文件名，那么这时候就用到了我们上面提取的源文件名，用strcat连接即可。当然在连接之前还要检查目标文件夹是否包含/，如果包含则删除，否则会连接成这样：existeddir//filename。当不存在此目标路径，我们要检测这个路径末尾这是一个不存在的目录（上述举例2）还是一个已存在目录下不存在的文件（举例5）。我们先找到目标路径中出现的最后一个/，然后检测这个/之前的路径是否存在。比如对于路径：～/existdirectory/nothisdirectory/nothisfile。我们需要检测的是～/existdirectory/nothisdirectory/是否存在，若不存在那就显示出错信息。如果存在，那么按照完整路径：～/existdirectory/nothisdirectory/nothisfile打开文件即可。实现代码如下：

01//check the if dest path has exsited
02if(stat(dest_path,&buf)==0)
03{
04    //the dest_path exsited
05    if(S_ISDIR(buf.st_mode))
06    {
07        if(dest_path[strlen(dest_path)-1]=='/')
08            dest_path[strlen(dest_path)-1]='\0';
09        strcat(dest_path,filename);
10    }
11}
12else
13{
14    //the dest_path didn't exsit
15    for(i=strlen(dest_path)-1;i>=0;i--)
16    {
17        if(dest_path[i]=='/')
18            break;
19    }
20    if(i>=0)
21    {
22        strncpy(dest_dir,dest_path,i+1);
23            if(stat(dest_dir,&buf)==-1)
24                 {
25                printf("my_cp:accessing:\"%s\" :it is't a directory.\n",dest_path);
26                exit(1);
27                }
28    }
29 
30}

下面是cp命令和本程序运行结果的比较。

1gues@huangwei-desktop:~/code/shell_command$ ./my_cp tfile.c ~/nothisdirectory/nothisfile
2my_cp:accessing:"/home/gues/nothisdirectory/nothisfile":it is't a directory.
3gues@huangwei-desktop:~/code/shell_command$ cp tfile.c ~/nothisdirectory/nothisfile
4cp: 正在访问"/home/gues/nothisdirectory/nothisfile": 不是目录

完成上述功能，便进行真正的拷贝了。我们不仅要拷贝源文件的内容，还要拷贝相关文件属性，比如存取权限，用户ID，用户组ID等。下面的代码便是实现上述功能。如果你完成了my_ls，下面的代码并不困难理解，在此不在赘述。

01//fistly the content which was read from srouce file will be write to dest file

02if((fdrd=open(src_path,O_RDONLY))==-1)

03{

04 perror("open");

05 exit(1);

06}

07if(lseek(fdrd,0,SEEK_END)==-1)

08{

09 perror("lseek");

10 exit(1);

11}

12if((len=lseek(fdrd,0,SEEK_CUR))==-1)

13{

14 perror("lseek");

15 exit(1);

16}

17if(lseek(fdrd,0,SEEK_SET)==-1)

18{

19 perror("lseek");

20 exit(1);

21}

22//open the dest file

23if((fdwt=open(dest_path,O_CREAT|O_TRUNC|O_RDWR,S_IRWXU))==-1)

24{

25 perror("open");

26 exit(1);

27}

28close(fdwt);

29if((fdwt=open(dest_path,O_WRONLY|O_APPEND))==-1)

30{

31 perror("open");

32 exit(1);

33}

34

35while(len-->\0)

36{

37 //write all characters to dest file

38 if(read(fdrd,ch,1)!=1)

39 {

40 perror("read");

41 exit(1);

42 }

43 if(write(fdwt,ch,1)!=1)

44 {

45 perror("write");

46 exit(1);

47 }

48

49}

50

51//get src file's attributes

52if(fstat(fdrd,&buf)==-1)

53{

54 perror("fstat");

55 exit(1);

56}

57//set the dset file's access right

58if(fchmod(fdwt,buf.st_mode)==-1)

59{

60 perror("fchmod");

61 exit(1);

62}

63//set file's user id and group id

64if(fchown(fdwt,buf.st_uid,buf.st_gid)==-1)

65{

66 perror("fchown");

67 exit(1);

68}

69close(fdwt);

70close(fdrd);

1.如果目标目录中的最低级目录不存在，则会新建这个目录，并把源目录中的所有文件拷贝到此新建的目录下。比如cp -r dir ./newdir。我们可以看到./newdir（这个路径中最低级的目录是newdir）在cp前是不存在的，但是cp后新建了这个目录，并且将dir中的所有文件拷贝到这个新建的目录下。

1gues@huangwei-desktop:~/code/shell_command$ ls
2cptest  ls   my_cp   my_cp.c  my_ls_plus    my_shell.c    nothisdirectory  tdir         test
3dir     ls1  my_cp1  my_ls.c  my_ls_plus.c  newdirectory  nothisfile       tdirmy_ls.c  ttfile.c
4gues@huangwei-desktop:~/code/shell_command$ ls dir
5ed  my_cp1  test  ttfile.c
6gues@huangwei-desktop:~/code/shell_command$ cp -r dir ./newdir
7gues@huangwei-desktop:~/code/shell_command$ ls newdir
8ed  my_cp1  test  ttfile.c

2.如果最低级的目标目录存在，则会将源目录（当然也包含源目录下的所有文件）拷贝到这个目标目录。我们仍执行上面那个命令：cp -r dir ./newdir。但是这次结果是不一样的，由于1的操作，newdir目录已经存在，这次cp后将dir目录拷贝到了已存在的newdir目录下(即./newdir/dir/)。

1gues@huangwei-desktop:~/code/shell_command$ ls newdir
2ed  my_cp1  test  ttfile.c
3gues@huangwei-desktop:~/code/shell_command$ cp ./dir -r ./newdir
4gues@huangwei-desktop:~/code/shell_command$ ls newdir
5dir  ed  my_cp1  test  ttfile.c

如果我说的还不够明白，你也可以自己亲自验证一下cp命令。

下面我们来详解。还是先保留传递过来的路径。然后如果源文件夹不包含/，则添加。

01void cp_directory(char* original_src_path,char* original_dest_path)
02{
03    structstat buf;
04    DIR *dir;
05    structdirent *ptr;
06    charpath[PATH_MAX+1];
07    charsrc_path[PATH_MAX+1],dest_path[PATH_MAX+1];
08 
09    strcpy(src_path,original_src_path);
10    strcpy(dest_path,original_dest_path);
11 
12    if(src_path[strlen(src_path)-1]!='/')
13    {
14        strncat(src_path,"/",1);
15    }
16        //the following code be omited
17}

如果目标目录中最低级的目录不存在，则创建它。如果次低级目录也不存在，则在创建的时候就发生错误。如果目标目录存在，并且是目录文件，那么就如同上面举例2中所述，我们需要将源路径中最低级的目录拷贝到目标目录中。这里面设计到提提取源路径最低级的目录，以及将其连接在目标目录后等。这些都不难理解。注意当完成目标路径的拼接后，如果这个目录本身就存在，那么我们将其删除，创建新目录。

01if(stat(dest_path,&buf)==-1)
02    {
03        //create a directory which name is dest_path
04        stat(src_path,&buf);
05        if(mkdir(dest_path,buf.st_mode)==-1)
06        {
07            printf("my_cp:create the directory \"%s\" error.\n",dest_path);
08            return;
09        }
10    }
11    else
12    {
13        //exist
14        if(!S_ISDIR(buf.st_mode))
15        {
16            printf("my_cp:the directory \"%s\" can't cover the no-directory \"%s\".\n",src_path,dest_path);
17            return;
18        }
19        else
20        {
21            if(dest_path[strlen(dest_path)-1]!='/')
22            {
23                strncat(dest_path,"/",1);
24            }
25            //extract the lowest directory
26            inti,k=0;
27            charlowestdir[PATH_MAX+1];
28            for(i=strlen(src_path)-1-1;i>\0;i--)
29            {
30                if(src_path[i]=='/')
31                {
32                    i=i+1;
33                    break;
34                }
35            }
36 
37            for(;i<\strlen(src_path);i++)
38            {
39                lowestdir[k++]=src_path[i];
40            }
41            strncat(dest_path,lowestdir,strlen(lowestdir));
42            structstat temp_buf;
43            chartemp_path[PATH_MAX+1]="rm -rf ";
44            if(stat(dest_path,&temp_buf)==0)
45            {
46                strcat(temp_path,dest_path);
47                system(temp_path);
48            }
49                    if(mkdir(dest_path,buf.st_mode)==-1)
50                {
51                printf("my_cp:create the directory \"%s\" error.\n",dest_path);
52                    return;
53                }
54        }
55    }

接着我们打开源目录，读取其下的所有文件名。这个方法在my_ls的时候就已经使用过。我们将这些文件名与目的路径拼接后，检查他们是否是目录文件。如果是普通文件那么就调用cp_single函数，否则调用cp_directory函数。

01if((dir=opendir(src_path))==NULL)
02{
03    printf("my_cp:open the srouce path \"%s\" error.\n",src_path);
04    return;
05}
06char temp_dest_path[PATH_MAX+1];
07strcpy(temp_dest_path,dest_path);
08while((ptr=readdir(dir))!=NULL)
09{
10    if(!strcmp(ptr->\d_name,"."))
11        continue;
12    if(!strcmp(ptr->\d_name,".."))
13        continue;
14    strcpy(path,src_path);
15    strcat(path,ptr->\d_name);
16    if(stat(path,&buf)==-1)
17    {
18        printf("my_cp:open the file \"%s\" error.\n",path);
19        return;
20    }
21    strcpy(dest_path,temp_dest_path);
22    //get the right dest_path
23    if(S_ISDIR(buf.st_mode))
24    {
25        cp_directory(path,dest_path);
26    }
27    else
28    {
29        cp_single(path,dest_path);
30    }
31}

其实这是一个递归的过程，对于递归，最重要的是能返回到调用函数。对于任何目录，最终要么这个目录是空的，要么全是普通文件，所以肯定能返回到上一级函数中，不会无限的去嵌套。

0 0