LINUX下PING与TCP_IP协议栈
来源:互联网 发布:剑网三霸刀捏脸数据 编辑:程序博客网 时间:2024/05/20 17:24
框架如下
PING程序
A.使用的SOCKET接口
1. socket
2. sendto
3. recvfrom
B.PING地址:127.0.0.1
TCP/IP协议栈:
1. IP层
2. ICMP层
/***********************************************************
* 名称:myping.c *
* 说明:本程序用于演示ping命令的实现原理 *
***********************************************************/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netdb.h>
#include <setjmp.h>
#include <errno.h>
#define PACKET_SIZE 4096
#define MAX_WAIT_TIME 5
#define MAX_NO_PACKETS 3
char sendpacket[PACKET_SIZE];
char recvpacket[PACKET_SIZE];
int sockfd,datalen=56;
int nsend=0,nreceived=0;
struct sockaddr_in dest_addr;
pid_t pid;
struct sockaddr_in from;
void statistics(int signo);
unsigned short cal_chksum(unsigned short *addr,int len);
int pack(int pack_no);
void send_packet(void);
void recv_packet(void);
int unpack(char *buf,int len);
void tv_sub(struct timeval *out,struct timeval *in);
void statistics(int signo)
{
printf("\n--------------------PING statistics-------------------\n");
printf("%d packets transmitted, %d received , %%%d lost\n",nsend,nreceived,(nsend-nreceived)/nsend*100);
close(sockfd);
exit(1);
}
/*校验和算法*/
unsigned short cal_chksum(unsigned short *addr,int len)
{
int nleft=len;
int sum=0;
unsigned short *w=addr;
unsigned short answer=0;
/*把ICMP报头二进制数据以2字节为单位累加起来*/
while(nleft>1)
{
sum+=*w++;
nleft-=2;
}
/*若ICMP报头为奇数个字节,会剩下最后一字节。把最后一个字节视为一个2字节数据的高字节,这个2字节数据的低字节为0,继续累加*/
if( nleft==1)
{
*(unsigned char *)(&answer)=*(unsigned char *)w;
sum+=answer;
}
sum=(sum>>16)+(sum&0xffff);
sum+=(sum>>16);
answer=~sum;
return answer;
}
/*设置ICMP报头*/
int pack(int pack_no)
{
int i,packsize;
struct icmp *icmp;
struct timeval * tval;
//将sendpacket强制转换成icmp结构
icmp = (struct icmp*)sendpacket;
icmp->icmp_type = ICMP_ECHO; //设置ICMP报文类型
icmp->icmp_code = 0;
icmp->icmp_cksum = 0;
icmp->icmp_seq = pack_no;
icmp->icmp_id = pid;
packsize = 8 + datalen;
tval = (struct timeval *)icmp->icmp_data;
gettimeofday(tval,NULL);
icmp->icmp_cksum = cal_chksum( (unsigned short *)icmp,packsize); /*校验算法*/
return packsize;
}
/*发送三个ICMP报文*/
void send_packet()
{
int packetsize;
while( nsend < MAX_NO_PACKETS)
{
nsend++;
packetsize = pack(nsend); /*设置ICMP报头*/
//int sendto ( SOCKET s , const char FAR *buf , int len , int flags , const struct sockaddr FAR *to , int token );
//[参数]
//s - 指向用Socket函数生成的Socket
//buf - 接受数据的缓冲区(数组)的指针
//len - 缓冲区的大小
//flag - 调用方式(MSG_DONTROUTE , MSG_OOB)
//to - 指向发送方SOCKET地址的指针
//token - 发送方SOCKET地址的大小
if( sendto(sockfd,sendpacket,packetsize,0,(struct sockaddr *)&dest_addr,sizeof(dest_addr) )<0 )
{
perror("sendto error");
continue;
}
sleep(1); /*每隔一秒发送一个ICMP报文*/
}
}
/*接收所有ICMP报文*/
void recv_packet()
{
int n,fromlen;
extern int errno;
signal(SIGALRM,statistics);
fromlen=sizeof(from);
while( nreceived<nsend)
{
alarm(MAX_WAIT_TIME);
//recvfrom()返回读入的字节数
if( (n = recvfrom(sockfd,recvpacket,sizeof(recvpacket),0,(struct sockaddr *)&from,&fromlen)) <0)
{
if(errno==EINTR)
continue;
perror("recvfrom error");
continue;
}
//解读收到的icmp包
if(unpack(recvpacket,n) == -1)
continue;
nreceived++;
}
}
/*剥去ICMP报头*/
int unpack(char *buf,int len)
{
int i,iphdrlen;
struct ip *ip;
struct icmp *icmp;
ip = (struct ip *)buf;
iphdrlen = ip->ip_hl << 2; /*求ip报头长度,即ip报头的长度标志乘4*/
icmp = (struct icmp *)(buf+iphdrlen); /*越过ip报头,指向ICMP报头*/
len -= iphdrlen; /*ICMP报头及ICMP数据报的总长度*/
if( len < 8) /*小于ICMP报头长度则不合理*/
{
printf("ICMP packets\'s length is less than 8\n");
return -1;
}
/*确保所接收的是自己发的ICMP的回应*/
if( (icmp->icmp_type == ICMP_ECHOREPLY) && (icmp->icmp_id == pid) )
{
/*显示相关信息*/
printf("%d byte from %s: icmp_seq=%u ttl=%d \n",
len,
inet_ntoa(from.sin_addr),
icmp->icmp_seq,
ip->ip_ttl
);
}
else
return -1;
}
int main(int argc,char *argv[])
{
struct hostent *host;
struct protoent *protocol;
unsigned long int inaddr = 0;
int waittime=MAX_WAIT_TIME;
int size=50*1024;
//检测参数是否过少
if(argc<2)
{
printf("usage:%s hostname/IP address\n",argv[0]);
exit(1);
}
//getprotobyname()返回对应于给定协议名的包含名字和协议号的protoent结构指针
//结构的成员有:
//成员 用途
//p_name 正规的协议名。
//p_aliases 一个以空指针结尾的可选协议名队列。
//p_proto 以主机字节顺序排列的协议号
if( (protocol=getprotobyname("icmp") )==NULL)
{
perror("getprotobyname");
exit(1);
}
/*生成使用ICMP的原始套接字,这种套接字只有root用户才能生成*/
if( (sockfd = socket(AF_INET,SOCK_RAW,protocol->p_proto) ) < 0)
{
perror("socket error");
exit(1);
}
/* 回收root权限,设置当前用户权限*/
setuid(getuid());
//初始化dest_addr
bzero(&dest_addr,sizeof(dest_addr));
//设置协议家族类型为 AF_INET
dest_addr.sin_family = AF_INET;
/*判断是主机名还是ip地址*/
if( inaddr = inet_addr(argv[1]) == INADDR_NONE)
{
//通过dns取得ip地址
if((host = gethostbyname(argv[1]) )==NULL) /*是主机名*/
{
perror("gethostbyname error");
exit(1);
}
memcpy( (char *)&dest_addr.sin_addr,host->h_addr,host->h_length);
}
else
{ /*是ip地址*/
inaddr = inet_addr(argv[1]);
memcpy( (char *)&dest_addr.sin_addr,(char *)&inaddr,sizeof(inaddr));
}
/*获取main的进程id,用于设置ICMP的标志符*/
pid=getpid();
printf("PING %s(%s): %d bytes data in ICMP packets.\n",argv[1],inet_ntoa(dest_addr.sin_addr),datalen);
send_packet(); /*发送所有ICMP报文*/
recv_packet(); /*接收所有ICMP报文*/
statistics(SIGALRM); /*进行统计*/
return 0;
}
PING的流程在上面已经有详细的注释了,我就不说了
PING程序的主要流程分为3个步骤
1. 建立一个socket结构 ->socket
2. 用这个socket发送ICMP包 ->sendto
3. 用这个socket接收ICMP包 ->recvfrom
由于是PING本机,所以在TCP/IP协议栈中会有4个部分的内容
1. 建立socket
2. 通过socket发送ICMP包
3. 本机收到ICMP包后发送应答
4. 通过socket接收ICMP包
下面我们就来进入TCP/IP协议栈来看看这3个系统调用如何为我们的PING程序服务的
首先是第1部分,建立一个socket结构
sockfd = socket(AF_INET,SOCK_RAW,protocol->p_proto)
这个函数会执行系统调用sys_socketcall
sys_socketcall在/net/socket.c中
asmlinkage long sys_socketcall(int call, unsigned long __user *args)
{
unsigned long a[6];
unsigned long a0, a1;
int err;
//检测参数的数量是否合理
if (call < 1 || call > SYS_RECVMSG)
return -EINVAL;
/* copy_from_user should be SMP safe. */
//从用户空间拷贝参数到内核空间,复制在a[]数组里
if (copy_from_user(a, args, nargs[call]))
return -EFAULT;
//取得所要判断的跳跃类型
err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
if (err)
return err;
a0 = a[0];
a1 = a[1];
switch (call) {
case SYS_SOCKET:
err = sys_socket(a0, a1, a[2]);
break;
.........................
case SYS_SENDTO:
err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], a[5]);
break;
...............................
case SYS_RECVFROM:
err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4],
(int __user *)a[5]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
上面只列出了我们所用到的3个case
现在我们的目标是case SYS_SOCKET,也就是要创建一个socket了
sys_socket在/net/socket.c中
asmlinkage long sys_socket(int family, int type, int protocol)
{
int retval;
struct socket *sock;
//创建一个socket
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
goto out;
//将该socket映射到fd中
retval = sock_map_fd(sock);
if (retval < 0)
goto out_release;
out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
out_release:
sock_release(sock);
return retval;
}
很简单的调用
sock_create在/net/socket.c中
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
继续,进入到__sock_create中
static int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
struct socket *sock;
const struct net_proto_family *pf;
/*
* Check protocol is in range
*/
//检测协议家族类型是否在范围之内
if (family < 0 || family >= NPROTO)
return -EAFNOSUPPORT;
//检测协议传输类型是否在范围之内
if (type < 0 || type >= SOCK_MAX)
return -EINVAL;
/* Compatibility.
This uglymoron is moved from INET layer to here to avoid
deadlock in module load.
*/
//检测协议家族类型是否为PF_INET
//检测协议传输类型是否为SOCK_PACKET
if (family == PF_INET && type == SOCK_PACKET)
{
static int warned;
if (!warned)
{
warned = 1;
printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
current->comm);
}
family = PF_PACKET;
}
err = security_socket_create(family, type, protocol, kern);
if (err)
return err;
/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
//分配一个socket
sock = sock_alloc();
//检测分配是否成功
if (!sock)
{
if (net_ratelimit())
printk(KERN_WARNING "socket: no more sockets\n");
return -ENFILE; /* Not exactly a match, but its the
closest posix thing */
}
//设置协议传输类型
sock->type = type;
#if defined(CONFIG_KMOD)
/* Attempt to load a protocol module if the find failed.
*
* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
if (net_families[family] == NULL)
request_module("net-pf-%d", family);
#endif
rcu_read_lock();
//根据协议类型取得对应的协议家族结构
pf = rcu_dereference(net_families[family]);
err = -EAFNOSUPPORT;
//检测取得协议结构是否成功
if (!pf)
goto out_release;
/*
* We will call the ->create function, that possibly is in a loadable
* module, so we have to bump that loadable module refcnt first.
*/
//增加协议家族的使用计数器
if (!try_module_get(pf->owner))
goto out_release;
/* Now protected by module ref count */
rcu_read_unlock();
//运行协议家族结构中的对socket初始化函数
err = pf->create(net, sock, protocol);
//检测初始化是否成功
if (err < 0)
goto out_module_put;
/*
* Now to bump the refcnt of the [loadable] module that owns this
* socket at sock_release time we decrement its refcnt.
*/
//增加socket所使用的协议的使用计数器
if (!try_module_get(sock->ops->owner))
goto out_module_busy;
/*
* Now that we're done with the ->create function, the [loadable]
* module can have its refcnt decremented
*/
//减少协议家族使用计数器
module_put(pf->owner);
err = security_socket_post_create(sock, family, type, protocol, kern);
if (err)
goto out_sock_release;
//设置socket指针为初始化完成的socket
*res = sock;
return 0;
out_module_busy:
err = -EAFNOSUPPORT;
out_module_put:
sock->ops = NULL;
module_put(pf->owner);
out_sock_release:
sock_release(sock);
return err;
out_release:
rcu_read_unlock();
goto out_sock_release;
}
security_socket_create,关于security的内容我们都略过,一来减少框架的复杂度,二来我也不知道security主要做的是啥 哈哈 不过可以肯定的是不会妨碍TCP/IP协议栈的正常运行
首先是sock_alloc
sock_alloc在/net/socket.c中
static struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
if (!inode)
return NULL;
sock = SOCKET_I(inode);
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
get_cpu_var(sockets_in_use)++;
put_cpu_var(sockets_in_use);
return sock;
}
主要是申请一个新的socket,并对他的文件属性进行初始化,socket是属于虚拟文件系统的一部分,我们暂时只要这一点就好了
回到__sock_create中,然后到
pf = rcu_dereference(net_families[family]);
net_families的初始化我们也不分析,因为涉及的面太广,为了紧扣PING,我们只需要知道得到了inet_family_ops这个结构就可以了,详细的初始化部分在/net/ipv4/af_inet.c中,大家有兴趣的可以看看
inet_family_ops的结构如下
static struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
紧接着我们就到了
err = pf->create(net, sock, protocol);
调用inet_family_ops的create函数
inet_create在/net/ipv4/af_inet.c中
static int inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
struct list_head *p;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
int err;
//检测socket的协议传输类型是否为RAW
//检测socket的协议传输类型是否为DGRAM
//第三个不知道检测的什么
if (sock->type != SOCK_RAW &&
sock->type != SOCK_DGRAM &&
!inet_ehash_secret)
build_ehash_secret();
//设置socket的状态为未连接
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
//初始化协议结构
answer = NULL;
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
//历遍协议族
list_for_each_rcu(p, &inetsw[sock->type])
{
//取得对应的协议的结构
answer = list_entry(p, struct inet_protosw, list);
/* Check the non-wild match. */
//检测需要的协议是否和当前历遍的协议相等
if (protocol == answer->protocol)
{
//检测需要的协议是否为IP协议
if (protocol != IPPROTO_IP)
//跳出循环
break;
}
else
{
/* Check for the two wild cases. */
//检测需要的协议是否为IP协议
if (IPPROTO_IP == protocol)
{
//设置需要的协议为当前历遍的协议
protocol = answer->protocol;
//跳出循环
break;
}
//检测当前历遍的协议是否为IP协议
if (IPPROTO_IP == answer->protocol)
//跳出循环
break;
}
err = -EPROTONOSUPPORT;
//设置协议结构为空
answer = NULL;
}
//检测取得协议是否为空
if (unlikely(answer == NULL))
{
if (try_loading_module < 2)
{
rcu_read_unlock();
/*
* Be more specific, e.g. net-pf-2-proto-132-type-1
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
*/
if (++try_loading_module == 1)
request_module("net-pf-%d-proto-%d-type-%d",
PF_INET, protocol, sock->type);
/*
* Fall back to generic, e.g. net-pf-2-proto-132
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
*/
else
request_module("net-pf-%d-proto-%d",
PF_INET, protocol);
goto lookup_protocol;
}
else
goto out_rcu_unlock;
}
err = -EPERM;
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
err = -EAFNOSUPPORT;
if (!inet_netns_ok(net, protocol))
goto out_rcu_unlock;
//设置socket的协议次操作集为当前协议结构的操作集
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
BUG_TRAP(answer_prot->slab != NULL);
err = -ENOBUFS;
//分配一个sock结构
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
//检测分配是否成功
if (sk == NULL)
goto out;
err = 0;
sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = 1;
//将sock结构强制转换成inet_sock结构
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
//检测协议传输类型是否为未处理
if (SOCK_RAW == sock->type)
{
//设置本地端口号为协议类型
inet->num = protocol;
//检测协议类型是否为未处理
if (IPPROTO_RAW == protocol)
inet->hdrincl = 1;
}
if (ipv4_config.no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
inet->id = 0;
//初始化sock
sock_init_data(sock, sk);
//设置sock的回收处理函数
sk->sk_destruct = inet_sock_destruct;
//设置sock的协议家族类型
sk->sk_family = PF_INET;
//设置sock的协议类型
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
sk_refcnt_debug_inc(sk);
//检测本地端口号是否存在
if (inet->num)
{
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
//设置对方端口号为本地端口号
inet->sport = htons(inet->num);
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);
}
//检测协议初始化函数是否存在
if (sk->sk_prot->init)
{
//执行协议初始化函数
err = sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
inetsw结构的注册不关心,我们看结果
answer就是其中的第二项
这里我们的protocol为IPPROTO_ICMP
answer->protocol为IPPROTO_IP
所以是进入了if (IPPROTO_IP == answer->protocol)后break跳出了循环
之后到inet_netns_ok
inet_netns_ok在/net/ipv4/af_inet.c中
static inline int inet_netns_ok(struct net *net, int protocol)
{
int hash;
struct net_protocol *ipprot;
if (net == &init_net)
return 1;
//取得哈希值
hash = protocol & (MAX_INET_PROTOS - 1);
//取得哈希值对应的协议
ipprot = rcu_dereference(inet_protos[hash]);
//检测协议是否为空
if (ipprot == NULL)
/* raw IP is OK */
return 1;
return ipprot->netns_ok;
}
由于在__sock_create中我们传入的net类型为init_net,所以这里是返回1,不会goto out_rcu_unlock结束的
继续在inet_create中向下走,来到了sk_alloc
sk_alloc在/net/core/sock.c中
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot)
{
struct sock *sk;
//分配一个sock结构
sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
//检测分配是否成功
if (sk)
{
//设置协议家族类型
sk->sk_family = family;
/*
* See comment in struct sock definition to understand
* why we need sk_prot_creator -acme
*/
//设置协议主操作集
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
sock_net_set(sk, get_net(net));
}
return sk;
}
sk_prot_alloc在协议结构的高速缓存中分配一个sock结构,分配成功后进行一些简单的初始化操作便退出了
继续向下走,到sock_init_data
sock_init_data在/net/core/sock.c中
void sock_init_data(struct socket *sock, struct sock *sk)
{
//初始化skb接收队列
skb_queue_head_init(&sk->sk_receive_queue);
//初始化skb发送队列
skb_queue_head_init(&sk->sk_write_queue);
//初始化skb错误队列
skb_queue_head_init(&sk->sk_error_queue);
#ifdef CONFIG_NET_DMA
skb_queue_head_init(&sk->sk_async_wait_queue);
#endif
sk->sk_send_head = NULL;
init_timer(&sk->sk_timer);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
//连接socket到sock
sk->sk_socket = sock;
sock_set_flag(sk, SOCK_ZAPPED);
//检测socket是否存在
if (sock)
{
//设置sock的协议传输类型
sk->sk_type = sock->type;
//设置sock的等待队列
sk->sk_sleep = &sock->wait;
//连接sock到socket
sock->sk = sk;
}
else
//设置sock的等待队列为空
sk->sk_sleep = NULL;
rwlock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
//设置sock的状态改变处理函数
sk->sk_state_change = sock_def_wakeup;
//设置sock的数据准备处理函数
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
//设置sock的错误处理函数
sk->sk_error_report = sock_def_error_report;
//设置sock的回收处理函数
sk->sk_destruct = sock_def_destruct;
//发送数据的缓冲页面
sk->sk_sndmsg_page = NULL;
//发送数据的缓冲页面偏移值
sk->sk_sndmsg_off = 0;
sk->sk_peercred.pid = 0;
sk->sk_peercred.uid = -1;
sk->sk_peercred.gid = -1;
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = ktime_set(-1L, 0);
atomic_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
初始化完成后继续inet_create的执行
由于之前设置了inet->num为协议号,这里会执行sk->sk_prot->hash
在进入这个函数之前让我们先来看一下目前sock的结构
指向了raw_prot,所以sk->sk_prot->hash就是执行了raw_hash_sk
void raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
write_lock_bh(&h->lock);
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
}
主要是将raw_prot连接到了socket的队列中,如下图
raw_init在/net/ipv4/raw.c中
static int raw_init(struct sock *sk)
{
//把sock结构强制转换为raw_sock结构
struct raw_sock *rp = raw_sk(sk);
//检测端口号是否为ICMP
if (inet_sk(sk)->num == IPPROTO_ICMP)
//清空icmp_filter结构
memset(&rp->filter, 0, sizeof(rp->filter));
return 0;
}
结构图如下
其实这是一早有预谋的,在raw_prot中有一个成员为
.obj_size = sizeof(struct raw_sock)
而在协议中分配空间的时候就已经分配了raw_sock所需要的空间,我们一直在用他的一部分而已
执行最后一步,把初始化好的socket结构映射到一个文件描述符中,并返回这个文件描述符
这样,我们的ping程序的sockfd就拿到了一个按要求初始化好的socket结构索引号了
在之后的sendto和recvfrom操作中就能够使用这个索引号进行发送和接收了
sendto(sockfd,sendpacket,packetsize,0,(struct sockaddr *)&dest_addr,sizeof(dest_addr)
这次我们的目标是case SYS_SENDTO
sys_sendto在/net/socket.c中
asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
unsigned flags, struct sockaddr __user *addr,
int addr_len)
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
int err;
struct msghdr msg;
struct iovec iov;
int fput_needed;
//从文件描述符中返回socket
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
//取得需要发送数据的起始地址
iov.iov_base = buff;
//取得需要发送数据的数据长度
iov.iov_len = len;
msg.msg_name = NULL;
//连接iov到msg
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
//是否有地址参数
if (addr)
{
//从用户数据转换为内核数据
err = move_addr_to_kernel(addr, addr_len, address);
if (err < 0)
goto out_put;
//设置地址
msg.msg_name = address;
//设置地址长度
msg.msg_namelen = addr_len;
}
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
err = sock_sendmsg(sock, &msg, len);
out_put:
fput_light(sock->file, fput_needed);
out:
return err;
}
初始化好的msg结构如下
在iovec结构中保存了我们要发送数据的首地址和大小
然后进入到sock_sendmsg
sock_sendmsg在/net/socket.c中
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
我不大明白kiocb的用处,google也不是说得很清楚,大概就是说关于文件同步操作方面上的,请明白的同学们指教一下 = 3=)/ 感谢 这里就不把kiocb的结构画进来了
然后进入到__sock_sendmsg
__sock_sendmsg在/net/socket.c中
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
int err;
//连接socket
si->sock = sock;
si->scm = NULL;
//连接msg
si->msg = msg;
//设置需要拷贝的数据大小
si->size = size;
err = security_socket_sendmsg(sock, msg, size);
if (err)
return err;
return sock->ops->sendmsg(iocb, sock, msg, size);
}
连接完成后的结构图如下
int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
{
struct sock *sk = sock->sk;
/* We may need to bind the socket. */
//检测端口号是否存在
if (!inet_sk(sk)->num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
我们在之前已经设置了端口号,所以这里直接来到了sk->sk_prot->sendmsg
sk->sk_prot->sendmsg调用的是raw_prot中的sendmsg操作,也就是raw_setsockopt函数
raw_setsockopt在/net/ipv4/raw.c中
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
int free = 0;
__be32 daddr;
__be32 saddr;
u8 tos;
int err;
err = -EMSGSIZE;
//检测数据的长度是否过长
if (len > 0xFFFF)
goto out;
/*
* Check the flags.
*/
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */
goto out; /* compatibility */
/*
* Get and verify the address.
*/
//检测是否有目的地址
if (msg->msg_namelen)
{
//将地址数据格式化成sockaddr_in结构
struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
err = -EINVAL;
//检测地址数据长度是否过小
if (msg->msg_namelen < sizeof(*usin))
goto out;
//检测协议家族类型是否为AF_INET
if (usin->sin_family != AF_INET)
{
static int complained;
if (!complained++)
printk(KERN_INFO "%s forgot to set AF_INET in "
"raw sendmsg. Fix it!\n",
current->comm);
err = -EAFNOSUPPORT;
//检测是否存在协议家族类型
if (usin->sin_family)
goto out;
}
//取得目的地址IP
daddr = usin->sin_addr.s_addr;
/* ANK: I did not forget to get protocol from port field.
* I just do not know, who uses this weirdness.
* IP_HDRINCL is much more convenient.
*/
}
else
{
err = -EDESTADDRREQ;
if (sk->sk_state != TCP_ESTABLISHED)
goto out;
daddr = inet->daddr;
}
ipc.addr = inet->saddr;
ipc.opt = NULL;
ipc.oif = sk->sk_bound_dev_if;
//检测是否有控制信息
if (msg->msg_controllen)
{
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
goto out;
if (ipc.opt)
free = 1;
}
saddr = ipc.addr;
ipc.addr = daddr;
//检测是否存在ip_options
if (!ipc.opt)
//无则设置为inet_sock中的ip_options
ipc.opt = inet->opt;
//检测是否存在ip_options
if (ipc.opt)
{
err = -EINVAL;
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
if (inet->hdrincl)
goto done;
if (ipc.opt->srr)
{
if (!daddr)
goto done;
daddr = ipc.opt->faddr;
}
}
//取得服务类型
tos = RT_CONN_FLAGS(sk);
if (msg->msg_flags & MSG_DONTROUTE)
tos |= RTO_ONLINK;
//检测是否为多播地址
if (ipv4_is_multicast(daddr))
{
if (!ipc.oif)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
}
//进行路由表查询
{
struct flowi fl = { .oif = ipc.oif,
.mark = sk->sk_mark,
.nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = saddr,
.tos = tos } },
.proto = inet->hdrincl ? IPPROTO_RAW :
sk->sk_protocol,
};
if (!inet->hdrincl)
{
err = raw_probe_proto_opt(&fl, msg);
if (err)
goto done;
}
security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
}
if (err)
goto done;
err = -EACCES;
if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
goto done;
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
if (inet->hdrincl)
{
err = raw_send_hdrinc(sk, msg->msg_iov, len,rt, msg->msg_flags);
}
else
{
if (!ipc.addr)
ipc.addr = rt->rt_dst;
lock_sock(sk);
//拷贝需要发送的数据到skb中
err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
&ipc, rt, msg->msg_flags);
//检测拷贝是否成功
if (err)
//不成功则释放所有sock下发送队列中所有的skb
ip_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
//发送sk中的skb
err = ip_push_pending_frames(sk);
release_sock(sk);
}
done:
if (free)
kfree(ipc.opt);
ip_rt_put(rt);
out:
if (err < 0)
return err;
return len;
do_confirm:
dst_confirm(&rt->u.dst);
if (!(msg->msg_flags & MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
goto done;
}
这里最关键的就是
err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
这是一个路由表查询函数
无能为力........
不过我根据DEBUG的信息把查询结果画了出来,分别为ipcm_cookie和rtable两个结构,其中最关键的为rtable中的dst_entry
lo网卡的注册在/drivers/net/loopback.c中
由于牵涉到路由表的添加问题,我这里就不介绍他的注册了
转自:http://blog.chinaunix.net/uid-13321460-id-2902439.html
- LINUX下PING与TCP_IP协议栈
- TCP_IP协议分析-协议分层
- ICMP协议与ping
- Linux下利用ICMP协议实现ping命令
- linux网络编程(4)——网络TCP_IP协议族
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ping 原理与ICMP协议
- ICMP协议与ping命令
- 程序员的数学笔记
- 给文件夹添加用户权限后,用户还是不能打开或修改子文件夹?
- 浏览器缓存机制
- 8天玩转并行开发——第三天 plinq的使用
- IOS网络篇19之CloundXNS域名解析
- LINUX下PING与TCP_IP协议栈
- iOS 6 Passbook 入门 2/2
- sqlserver连接错误
- 10大最毒路边小吃盘点,你常吃吗?
- HTML5开源框架和项目推荐
- 三星半导体官网
- requestDisallowInterceptTouchEvent解决子View和父View滑动的冲突
- 8天玩转并行开发——第四天 同步机制(上)
- 转:Socket.ConnectAsync 方法 (SocketAsyncEventArgs)