SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
来源:互联网 发布:淘宝男装店名起名大全 编辑:程序博客网 时间:2024/05/18 01:54
[bind]
[bind->sockfd_lookup_light]
[bind->sockfd_lookup_light->sock_from_file]
[bind]
[bind->move_addr_to_kernel]
[bind]
[bind->inet_bind]
[bind->inet_bind->__inet_dev_addr_type]
[bind->inet_bind]
[bind->inet_bind->inet_csk_get_port]
[bind->inet_bind->inet_csk_get_port->inet_get_local_port_range]
[bind->inet_bind->inet_csk_get_port]
[bind->inet_bind->inet_csk_get_port->inet_is_reserved_local_port]
[bind->inet_bind->inet_csk_get_port]
[bind->inet_bind->inet_csk_get_port->bind_conflict]
[bind->inet_bind->inet_csk_get_port]
[bind->inet_bind->inet_csk_get_port->inet_bind_hash]
[bind->inet_bind]
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen){struct socket *sock;int err, fput_needed;sock = sockfd_lookup_light(fd, &err, &fput_needed);从文件fd中得到对应的socket:
[bind->sockfd_lookup_light]
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed){struct fd f = fdget(fd);struct socket *sock;*err = -EBADF;if (f.file) {sock = sock_from_file(f.file, err);if (likely(sock)) {*fput_needed = f.flags;return sock;}fdput(f);}return NULL;}调用fdget先从进程的文件列表中通过fd找到对应的文件,然后从中得到socket,文件的flag通过fput_needed返回:
[bind->sockfd_lookup_light->sock_from_file]
struct socket *sock_from_file(struct file *file, int *err){if (file->f_op == &socket_file_ops)return file->private_data;/* set in sock_map_fd */*err = -ENOTSOCK;return NULL;}在初始化文件时,file->private_data指向新建的socket
[bind]
if (sock) {err = move_addr_to_kernel(umyaddr, addrlen, &address);将IP地址从用户空间copy到内核空间:
[bind->move_addr_to_kernel]
/** *move_addr_to_kernel-copy a socket address into kernel space *@uaddr: Address in user space *@kaddr: Address in kernel space *@ulen: Length in user space * *The address is copied into kernel space. If the provided address is *too long an error code of -EINVAL is returned. If the copy gives *invalid addresses -EFAULT is returned. On a success 0 is returned. */int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr){if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))return -EINVAL;if (ulen == 0)return 0;if (copy_from_user(kaddr, uaddr, ulen))return -EFAULT; return audit_sockaddr(ulen, kaddr);}对地址进行一些检测,将地址copy到内核空间。audit_sockaddr对地址进一步验证。
[bind]
if (err >= 0) {err = sock->ops->bind(sock, (struct sockaddr *)&address, addrlen);}} return err;}对TCP,这里的ops为inet_stream_ops,调用的是inet_bind。
[bind->inet_bind]
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len){struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;struct net *net = sock_net(sk);int chk_addr_ret;if (addr_len < sizeof(struct sockaddr_in))goto out; chk_addr_ret = __inet_dev_addr_type(net, NULL, addr->sin_addr.s_addr);先对地址的长度检查,然后得到地址的类型:
[bind->inet_bind->__inet_dev_addr_type]
/* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. */static inline unsigned int __inet_dev_addr_type(struct net *net,const struct net_device *dev,__be32 addr){if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))return RTN_BROADCAST;if (ipv4_is_multicast(addr))return RTN_MULTICAST;如果地址(addr & htonl(0xff000000)) == htonl(0x00000000)或者addr == htonl(0xffffffff),类型为广播。如果(addr & htonl(0xf0000000)) == htonl(0xe0000000)类型为多播。
struct flowi4fl4 = { .daddr = addr };struct fib_resultres;unsigned int ret = RTN_BROADCAST;struct fib_table *local_table;local_table = fib_get_table(net, RT_TABLE_LOCAL);if (local_table) {ret = RTN_UNICAST;if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {if (!dev || dev == res.fi->fib_dev)ret = res.type;}}return ret;}如果在本地路由表中查找到,类型为路由表中的值。如果不存在本地路由表,类型为广播。否则类型为单播。
[bind->inet_bind]
snum = ntohs(addr->sin_port);if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))goto out; /* Check these errors (active socket, double bind). */if (sk->sk_state != TCP_CLOSE || inet->inet_num)goto out_release_sock;inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)inet->inet_saddr = 0; /* Use device *//* Make sure we are allowed to bind here. */if (sk->sk_prot->get_port(sk, snum)) {inet->inet_saddr = inet->inet_rcv_saddr = 0;err = -EADDRINUSE;goto out_release_sock;}得到端口号并对其范围做一个检测。绑定时socket如果不处于关闭状态或是本地端口不为0(己绑定),出错。根据上面的地址类型设置地址。然后调用get_port,对TCP,调用inet_csk_get_port确定此地址是否可以被绑定:
[bind->inet_bind->inet_csk_get_port]
/* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */int inet_csk_get_port(struct sock *sk, unsigned short snum){struct net *net = sock_net(sk);if (!snum) {int low, high;again:inet_get_local_port_range(net, &low, &high);当端口号为0时,先得到本地端口号的范围
[bind->inet_bind->inet_csk_get_port->inet_get_local_port_range]
void inet_get_local_port_range(struct net *net, int *low, int *high){*low = net->ipv4.sysctl_local_ports.range[0];*high = net->ipv4.sysctl_local_ports.range[1];}net->ipv4.sysctl_local_ports在初始化sysctl模块时被设置,在函数ipv4_sysctl_init_net中,范围为:[ 32768, 61000 ]
[bind->inet_bind->inet_csk_get_port]
int smallest_size = -1;if (!snum) { int remaining, rover;again: ... remaining = (high - low) + 1;rover = prandom_u32() % remaining + low; smallest_size = -1; do { if (inet_is_reserved_local_port(rover)) goto next_nolock;先计算出一个随机的端口号,判断它是不是已经存在了:
[bind->inet_bind->inet_csk_get_port->inet_is_reserved_local_port]
extern unsigned long *sysctl_local_reserved_ports;static inline int inet_is_reserved_local_port(int port){return test_bit(port, sysctl_local_reserved_ports);}sysctl_local_reserved_ports在inet_init中被初始化,是一个8192字节的内存块,用来保存己存在的端口号。
[bind->inet_bind->inet_csk_get_port]
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;struct inet_bind_hashbucket *head;struct inet_bind_bucket *tb;int smallest_size = -1, smallest_rover;kuid_t uid = sock_i_uid(sk);if (!snum) {int remaining, rover, low, high;again:...smallest_size = -1;do {...head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)];inet_bind_bucket_for_each(tb, &head->chain)if (net_eq(ib_net(tb), net) && tb->port == rover) {/* 列表项的fastreuse被设置,sk的sk_reuse被设置且状态不是TCP_LISTEN * 或者列表项的fastreuseport大于0并且sk的sk_reuseport不为0并且两者的用户ID相同 * 上面满足后,找到列表项的num_owners最小的一个 */if ( ( ( tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN ) || (tb->fastreuseport > 0 && sk->sk_reuseport && uid_eq(tb->fastuid, uid)) ) && ( tb->num_owners < smallest_size || smallest_size == -1) ) {smallest_size = tb->num_owners;smallest_rover = rover; /* 列表的bsockets值大于本地端口的范围 * 在tb中无冲突,找到位置 */ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {snum = smallest_rover; goto tb_found; } } if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; goto tb_found; } goto next; } break; next: next_nolock: if (++rover > high) rover = low; } while (--remaining > 0)所有被绑定的端口都通过一个结构(inet_bind_bucket)放在一个列表中。对此列表进行循环,如果它们在同一个网络名字空间并且端口号相同,进一步判断。当tb->fastreuse大于0时,表示当中的socket的sk_reuse不为0且状态不是TCP_LISTEN;当tb->fastreuseport大于0时表示当中的socket的sk_reuseport不为0。如果端口号的范围超过范围,使用最小的端口号。TCP时,icsk_af_ops为ipv4_specific,调用的接口为inet_csk_bind_conflict。tb中将相同的socket列入到列表中,对这个列表中的元素也进行查询。remaining初始化为5,这样的检查总共进行5次。
[bind->inet_bind->inet_csk_get_port->bind_conflict]
int inet_csk_bind_conflict(const struct sock *sk,const struct inet_bind_bucket *tb, bool relax){struct sock *sk2;int reuse = sk->sk_reuse;int reuseport = sk->sk_reuseport;kuid_t uid = sock_i_uid((struct sock *)sk);/* * Unlike other sk lookup places we do not check * for sk_net here, since _all_ the socks listed * in tb->owners list belong to the same net - the * one this bucket belongs to. */sk_for_each_bound(sk2, &tb->owners) {/* socket不同并且 * 两个socket的sk_bound_dev_if相同且不为0 */if ( sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {/* 两者的sk_reuse有一个不为0或列表项的状态为TCP_LISTEN;并且 * 两者的sk_reuseport 有一个不为0;或列表项的状态不是TCP_TIME_WAIT并且两者UID不同 */ if ( (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && (!reuseport || !sk2->sk_reuseport || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { /* 两者的sk_rcv_saddr 相同且不为0 */ if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || sk2->sk_rcv_saddr == sk->sk_rcv_saddr)break; } /* 两者的sk_reuse都不为0并且列表项的状态不是TCP_LISTEN */if ( !relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { /* 两者的sk_rcv_saddr 相同且不为0 */ if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || sk2->sk_rcv_saddr == sk->sk_rcv_saddr)break; }} }/* 找到的列表项不为0,有冲突 */return sk2 != NULL; }列表中的socket与传进来的socket满足下面的条件:在同一个设备上(设备号不为0);接收地址相同(地址不为空);列表中的状态不是TCP_LISTEN且两者的sk_reuse都不为0;或列表中的状态是TCP_LISTEN或两者的sk_reuse有一个不为0,并且两者的sk_reuseport 有一个不为0,或列表项的状态不是TCP_TIME_WAIT且两者UID不同
[bind->inet_bind->inet_csk_get_port]
/* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash * locks if this test triggers, because if 'remaining' * drops to zero, we broke out of the do/while loop at * the top level, not from the 'break;' statement. */ret = 1;if (remaining <= 0) {if (smallest_size != -1) {snum = smallest_rover;goto have_snum;}goto fail;}/* OK, here is the one we will use. HEAD is * non-NULL and we hold it's mutex. */snum = rover;如果remaining不为0,说明找到了。如果为0,并且smallest_size不为-1,就将商品号设为刚才的最小的端口号然后进一步检查,否则就出错了。下面就是当snum不为0的情况:
} else {have_snum:head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)];inet_bind_bucket_for_each(tb, &head->chain)if (net_eq(ib_net(tb), net) && tb->port == snum)goto tb_found;}tb = NULL;goto tb_not_found;这时简单,只要在列表中找同一网络名字下相同的商品号就可以了。
tb_found:if (!hlist_empty(&tb->owners)) {if (sk->sk_reuse == SK_FORCE_REUSE)goto success;/* tb的fastreuse大于0并且sk的sk_reuse不为0且sk的状态不为TCP_LISTEN * 或tb的fastreuseport大于0并且sk的sk_reuseport不为0且tb的fastuid和sk的UID相同 */if ( ( (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN) || (tb->fastreuseport > 0 && sk->sk_reuseport && uid_eq(tb->fastuid, uid)) ) && smallest_size == -1) {goto success;} else {ret = 1;if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {/* sk的sk_reuse不为0且状态不为TCP_LISTEN;或tb的fastreuseport大于0并且sk的sk_reuseport不为0且tb的fastuid和sk的UID相同 */if ( ( (sk->sk_reuse && sk->sk_state != TCP_LISTEN) || (tb->fastreuseport > 0 && sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) {spin_unlock(&head->lock);goto again;}goto fail_unlock;}}}如果tb中的列表为空,说明还是没找到。如果sk的sk_reuse值为SK_FORCE_REUSE,成功了。attempts为5,当不成功时会尝试5次
tb_not_found:ret = 1;if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, net, head, snum)) == NULL)goto fail_unlock;if (hlist_empty(&tb->owners)) {if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)tb->fastreuse = 1;elsetb->fastreuse = 0;if (sk->sk_reuseport) {tb->fastreuseport = 1;tb->fastuid = uid;} elsetb->fastreuseport = 0;} else {if (tb->fastreuse && (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))tb->fastreuse = 0;if (tb->fastreuseport && (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))tb->fastreuseport = 0;}当tb为0时,新建一个tb当作节点。当sk的状态不是TCP_LISTEN且sk_reuse不为0时,tb的fastreuse为1。当sk的sk_reuseport不国0时,tb的fastreuseport为1且fastuid设为sk的UID。当tb不为0时,根据sk的值要重设fastreuse和fastreuseport的值。
success:if (!inet_csk(sk)->icsk_bind_hash)inet_bind_hash(sk, tb, snum);WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);ret = 0;fail_unlock:spin_unlock(&head->lock);fail:local_bh_enable();return ret;}最后将tb和端口号加入到列表中。
[bind->inet_bind->inet_csk_get_port->inet_bind_hash]
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum){ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; atomic_inc(&hashinfo->bsockets);inet_sk(sk)->inet_num = snum; hlist_add_head(&sk->sk_bind_node, &tb->owners);tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb;}设置socket的本地端口号为snum,sk通过sk_bind_node,加入到tb->owners中
[bind->inet_bind]
if (inet->inet_rcv_saddr)sk->sk_userlocks |= SOCK_BINDADDR_LOCK;if (snum)sk->sk_userlocks |= SOCK_BINDPORT_LOCK;inet->inet_sport = htons(inet->inet_num);inet->inet_daddr = 0;inet->inet_dport = 0;sk_dst_reset(sk);err = 0;out_release_sock:release_sock(sk);out:return err;}设置源端口号inet_sport为本地端口号inet_num。设置sk中的路由为空。如果找到,返回为0,说明端口号被己被绑定,出错。
0 0
- SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
- SYSCALL_DEFINE2(listen, int, fd, int, backlog)
- recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen);/函数简单了解
- bind(lisnfd,(struct sockaddr *)&addr,sizeof(addr));
- int set_opt(int fd,int nSpeed, int nBits, char nEvent, int nStop)
- struct sockaddr
- struct sockaddr {
- int tty_register_driver(struct tty_driver *driver)
- int platform_driver_register(struct platform_driver *drv)
- int driver_register(struct device_driver *drv)
- int s3c24xx_serial_probe(struct platform_device *dev,
- int uart_register_driver(struct uart_driver *drv)
- int device_register(struct device *dev)
- int
- int?
- int ?
- int
- Int
- 【我们都爱Paul Hegarty】斯坦福IOS8公开课个人笔记25 Lifecycle生命周期
- 《Effective C++》重点摘要(四)
- List中toArray()的使用方法
- Failure [INSTALL_FAILED_OLDER_SDK]报错
- Lua中强大的元方法__index详解
- SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
- 算法的时间复杂度和空间复杂度
- Chrome 42禁用NPAPI和相关插件:Java、Unity和Silverlight
- 关于python2.7.6+django1.4.2的admin样式无效问题记录
- 常见IO错误码和Errno错误码笔记
- 素数测试
- 转动的饼图
- 【Java】利用ant插件压缩文件夹及其所有子文件与子文件夹
- 学习笔记--合并流sequenceInputStream