inet_init
是 Linux 内核中用于初始化 TCP/IP 协议栈的函数。它在内核启动时被调用,完成各种协议和数据结构的注册和初始化。
主要功能:
- 注册 TCP、UDP、ICMP 等协议。
- 初始化 ARP、IP 和其他网络协议模块。
- 设置 socket 操作和协议处理。
前后调用关系链:
start_kernel()└── rest_init()└── kernel_init()└── do_basic_setup()└── do_initcalls()└── inet_init()├── proto_register(&tcp_prot, 1)├── proto_register(&udp_prot, 1)├── proto_register(&raw_prot, 1)├── proto_register(&ping_prot, 1)├── sock_register(&inet_family_ops)├── inet_add_protocol(&icmp_protocol, IPPROTO_ICMP)├── inet_add_protocol(&udp_protocol, IPPROTO_UDP)├── inet_add_protocol(&tcp_protocol, IPPROTO_TCP)├── arp_init()├── ip_init()├── tcp_v4_init()├── udp_init()├── ping_init()├── icmp_init()└── init_ipv4_mibs()
再来看源代码:
static int __init inet_init(void)
{struct sk_buff *dummy_skb;struct inet_protosw *q;struct list_head *r;int rc = -EINVAL;BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);if (!sysctl_local_reserved_ports)goto out;rc = proto_register(&tcp_prot, 1);if (rc)goto out_free_reserved_ports;rc = proto_register(&udp_prot, 1);if (rc)goto out_unregister_tcp_proto;rc = proto_register(&raw_prot, 1);if (rc)goto out_unregister_udp_proto;rc = proto_register(&ping_prot, 1);if (rc)goto out_unregister_raw_proto;/** Tell SOCKET that we are alive...*/(void)sock_register(&inet_family_ops);#ifdef CONFIG_SYSCTLip_static_sysctl_init();
#endif/** Add all the base protocols.*/if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICASTif (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif/* Register the socket-side information for inet_create. */for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)INIT_LIST_HEAD(r);for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)inet_register_protosw(q);/** Set the ARP module up*/arp_init();/** Set the IP module up*/ip_init();tcp_v4_init();/* Setup TCP slab cache for open requests. */tcp_init();/* Setup UDP memory threshold */udp_init();/* Add UDP-Lite (RFC 3828) */udplite4_register();ping_init();/** Set the ICMP layer up*/if (icmp_init() < 0)panic("Failed to create the ICMP control socket.\n");/** Initialise the multicast router*/
#if defined(CONFIG_IP_MROUTE)if (ip_mr_init())printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif/** Initialise per-cpu ipv4 mibs*/if (init_ipv4_mibs())printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");ipv4_proc_init();ipfrag_init();dev_add_pack(&ip_packet_type);rc = 0;
out:return rc;
out_unregister_raw_proto:proto_unregister(&raw_prot);
out_unregister_udp_proto:proto_unregister(&udp_prot);
out_unregister_tcp_proto:proto_unregister(&tcp_prot);
out_free_reserved_ports:kfree(sysctl_local_reserved_ports);goto out;
}fs_initcall(inet_init);
调用路径
start_kernel()
:内核的入口函数,位于 init/main.c 中,完成内核的基本初始化工作。rest_init()
:在start_kernel()
中被调用,创建内核初始化线程 kernel_init。kernel_init()
:内核初始化线程的主函数,负责内核的后续初始化工作。do_basic_setup()
:在kernel_init()
中被调用,执行所有的初始化调用(initcall)。do_initcalls()
:在do_basic_setup()
中被调用,遍历所有的 initcall 函数,并依次执行它们。inet_init()
:作为一个 __initcall 函数被调用,用于初始化 TCP/IP 协议栈。
proto_register()
:用于注册不同的协议(TCP、UDP、RAW、PING),将它们添加到协议列表中,以便后续处理。
sock_register()
:注册 socket 操作,包括创建和管理 socket 的方法。
inet_add_protocol()
:将各个传输层协议(如 ICMP、UDP、TCP)添加到网络层,以便接收和处理数据包。
模块初始化函数(如 arp_init()
, ip_init()
, tcp_v4_init()
, 等):这些函数负责初始化各个网络模块,为后续的数据传输做好准备。
在看最后一行的处理函数
/*** dev_add_pack - add packet handler* @pt: packet type declaration** Add a protocol handler to the networking stack. The passed &packet_type* is linked into kernel lists and may not be freed until it has been* removed from the kernel lists.** This call does not sleep therefore it can not* guarantee all CPU's that are in middle of receiving packets* will see the new packet type (until the next received packet).*/void dev_add_pack(struct packet_type *pt)
{struct list_head *head = ptype_head(pt);spin_lock(&ptype_lock);list_add_rcu(&pt->list, head);spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
dev_add_pack
是 Linux 内核中用于添加数据包处理程序的函数。它将一个协议处理程序(packet handler)注册到网络栈中,以便在接收到特定类型的数据包时能够正确处理这些数据包。
具体来说:
- 添加协议处理程序:将传入的
packet_type
结构体链接到内核的链表中,允许内核在接收到相应类型的数据包时调用相应的处理程序。struct packet_type {__be16 type; /* This is really htons(ether_type). */struct net_device *dev; /* NULL is wildcarded here */int (*func) (struct sk_buff *,struct net_device *,struct packet_type *,struct net_device *);struct sk_buff *(*gso_segment)(struct sk_buff *skb,u32 features);int (*gso_send_check)(struct sk_buff *skb);struct sk_buff **(*gro_receive)(struct sk_buff **head,struct sk_buff *skb);int (*gro_complete)(struct sk_buff *skb);void *af_packet_priv;struct list_head list; };
- 非阻塞操作:该函数在执行过程中不会导致线程睡眠,这意味着它可以在任何上下文中被调用,包括中断上下文。
这么做有什么好处呢?
- 网络协议处理:当网络设备接收到数据包时,内核会检查数据包类型,并调用相应的处理程序。通过
dev_add_pack()
注册的数据包处理程序会在接收到匹配的数据包时被触发。 - 动态协议支持:可以在运行时动态地添加新的协议处理程序,而不需要重启内核或修改内核代码。这使得内核能够灵活地支持多种网络协议和功能。
那么,它如何如何根据数据包的类型来找到对应的链表头呢?
/*******************************************************************************Protocol management and registration routines*******************************************************************************//** Add a protocol ID to the list. Now that the input handler is* smarter we can dispense with all the messy stuff that used to be* here.** BEWARE!!! Protocol handlers, mangling input packets,* MUST BE last in hash buckets and checking protocol handlers* MUST start from promiscuous ptype_all chain in net_bh.* It is true now, do not change it.* Explanation follows: if protocol handler, mangling packet, will* be the first on list, it is not able to sense, that packet* is cloned and should be copied-on-write, so that it will* change it and subsequent readers will get broken packet.* --ANK (980803)*/static inline struct list_head *ptype_head(const struct packet_type *pt)
{if (pt->type == htons(ETH_P_ALL))return &ptype_all;elsereturn &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}