• linux内核网络收包过程(一)


    目录

    网络收包过程

    创建ksoftirqd内核线程

    网络子系统初始化

    协议栈注册

    网卡驱动初始化

    启动网卡


    网络收包过程

    1,数据帧从外部网络到达网卡

    2,网卡把帧使用DMA方式搬到内存

    3,硬中断通知CPU

    4,CPU响应硬中断,通知软中断处理数据

    5,ksoftirqd内核进程处理软中断,调用网卡驱动注册的poll函数开始收包

    6,帧从Ring buffer上取出

    7,协议层处理网络帧将数据data放到socket接收队列中


    创建ksoftirqd内核线程

    软中断在内核线程ksoftirqd中进行 [kernel/softirq.c]

    1. static struct smp_hotplug_thread softirq_threads = {
    2. .store = &ksoftirqd,
    3. .thread_should_run = ksoftirqd_should_run,
    4. .thread_fn = run_ksoftirqd,
    5. .thread_comm = "ksoftirqd/%u",
    6. };
    7. static __init int spawn_ksoftirqd(void)
    8. {
    9. cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
    10. takeover_tasklets);
    11. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
    12. return 0;
    13. }
    14. early_initcall(spawn_ksoftirqd);

    ksoftirqd被创建出来以后,进入循环函数ksoftirqd_should_run与run_ksoftirqd,判断有没有软中断需要被处理。


    网络子系统初始化

    1. static int __init net_dev_init(void)
    2. {
    3. for_each_possible_cpu(i) {
    4. struct work_struct *flush = per_cpu_ptr(&flush_works, i);
    5. struct softnet_data *sd = &per_cpu(softnet_data, i);
    6. skb_queue_head_init(&sd->input_pkt_queue);
    7. skb_queue_head_init(&sd->process_queue);
    8. }
    9. ...
    10. open_softirq(NET_TX_SOFTIRQ, net_tx_action);
    11. open_softirq(NET_RX_SOFTIRQ, net_rx_action);
    12. }
    13. subsys_initcall(net_dev_init);

    open_softirq注册软中断NET_TX_SOFTIRQ发送与NET_RX_SOFTIRQ接收。

    1. void open_softirq(int nr, void (*action)(struct softirq_action *))
    2. {
    3. softirq_vec[nr].action = action;
    4. }

    softirq_vec中断向量,根据软中断枚举,注册相应的函数。


    协议栈注册

    网络层ip协议,传输层tcp/udp协议

    1. static struct packet_type ip_packet_type __read_mostly = {
    2. .type = cpu_to_be16(ETH_P_IP),
    3. .func = ip_rcv,
    4. .list_func = ip_list_rcv,
    5. };
    6. static struct net_protocol tcp_protocol = {
    7. .early_demux = tcp_v4_early_demux,
    8. .early_demux_handler = tcp_v4_early_demux,
    9. .handler = tcp_v4_rcv,
    10. .err_handler = tcp_v4_err,
    11. .no_policy = 1,
    12. .netns_ok = 1,
    13. .icmp_strict_tag_validation = 1,
    14. };
    15. static struct net_protocol udp_protocol = {
    16. .early_demux = udp_v4_early_demux,
    17. .early_demux_handler = udp_v4_early_demux,
    18. .handler = udp_rcv,
    19. .err_handler = udp_err,
    20. .no_policy = 1,
    21. .netns_ok = 1,
    22. };
    23. static int __init inet_init(void)
    24. {
    25. //注册tcp_port
    26. rc = proto_register(&tcp_prot, 1);
    27. //注册udp_port
    28. rc = proto_register(&udp_prot, 1);
    29. ...
    30. //注册udp协议
    31. if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
    32. pr_crit("%s: Cannot add UDP protocol\n", __func__);
    33. //注册tcp协议
    34. if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
    35. pr_crit("%s: Cannot add TCP protocol\n", __func__);
    36. dev_add_pack(&ip_packet_type);
    37. }

     先看一下inet_add_protocol

    1. struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
    2. inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
    3. {
    4. ...
    5. return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
    6. NULL, prot) ? 0 : -1;
    7. }

    inet_add_protocol将tcp,udp相应的函数注册到inet_protos数组中。即inet_protos 记录着 udp,tcp 的处理函数地址

    再看一下dev_add_pack

    1. void dev_add_pack(struct packet_type *pt)
    2. {
    3. struct list_head *head = ptype_head(pt);
    4. ...
    5. }
    6. static inline struct list_head *ptype_head(const struct packet_type *pt)
    7. {
    8. if (pt->type == htons(ETH_P_ALL))
    9. return pt->dev ? &pt->dev->ptype_all : &ptype_all;
    10. else
    11. return pt->dev ? &pt->dev->ptype_specific :
    12. &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
    13. }
    ptype_base 存储着ip_rcv() 函数的处理地址。

    网卡驱动初始化

    驱动程序会使⽤ module_init 向内核注册⼀个初始化函数,以igb网卡驱动为例

    1. static struct pci_driver igb_driver = {
    2. .name = igb_driver_name,
    3. .id_table = igb_pci_tbl,
    4. .probe = igb_probe,
    5. .remove = igb_remove,
    6. ...
    7. };
    8. static int __init igb_init_module(void)
    9. {
    10. int ret;
    11. ret = pci_register_driver(&igb_driver);
    12. return ret;
    13. }
    14. module_init(igb_init_module);
    当网卡设备被识别后,内核会调用其驱动probe方法(igb_probe)
    1. static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    2. {
    3. //DMA初始化
    4. err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
    5. //ethools注册igb_netdev_ops
    6. netdev->netdev_ops = &igb_netdev_ops;
    7. igb_set_ethtool_ops(netdev);
    8. err = igb_sw_init(adapter);
    9. //注册netdev
    10. err = register_netdev(netdev);
    11. dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
    12. }

    igb_sw_init-> igb_init_interrupt_scheme->igb_alloc_q_vectors->igb_alloc_q_vector

    1. static int igb_alloc_q_vector(struct igb_adapter *adapter,
    2. int v_count, int v_idx,
    3. int txr_count, int txr_idx,
    4. int rxr_count, int rxr_idx)
    5. {
    6. netif_napi_add(adapter->netdev, &q_vector->napi,
    7. igb_poll, 64);
    8. ...
    9. }

    NAPI 机制所必须的 poll 函数,对于 igb ⽹卡驱动来说是 igb_poll。

    启动网卡

    当启⽤⼀个⽹卡时(例通过 ifconfig eth0 up ),net_device_ops 中的 igb_open ⽅法会被调⽤
    1. 启动网卡
    2. 分配RX TX队列内存
    3. 调用net_device_ops中注册open等函数
    4. 注册中断处理函数
    5. 开硬中断,等待数据包
    1. static int __igb_open(struct net_device *netdev, bool resuming)
    2. {
    3. /* allocate transmit descriptors */
    4. err = igb_setup_all_tx_resources(adapter);
    5. /* allocate receive descriptors */
    6. err = igb_setup_all_rx_resources(adapter);
    7. //处理中断
    8. err = igb_request_irq(adapter);
    9. //使能napi
    10. for (i = 0; i < adapter->num_q_vectors; i++)
    11. napi_enable(&(adapter->q_vector[i]->napi));
    12. ...
    13. return 0;
    14. }

    中断处理igb_request_irq

    1. static int igb_request_irq(struct igb_adapter *adapter)
    2. {
    3. struct net_device *netdev = adapter->netdev;
    4. struct pci_dev *pdev = adapter->pdev;
    5. int err = 0;
    6. if (adapter->flags & IGB_FLAG_HAS_MSIX) {
    7. err = igb_request_msix(adapter);
    8. ...
    9. }
    10. ...
    11. }
    12. static int igb_request_msix(struct igb_adapter *adapter)
    13. {
    14. for (i = 0; i < adapter->num_q_vectors; i++) {
    15. struct igb_q_vector *q_vector = adapter->q_vector[i];
    16. vector++;
    17. err = request_irq(adapter->msix_entries[vector].vector,
    18. igb_msix_ring, 0, q_vector->name,
    19. q_vector);
    20. }
    21. igb_configure_msix(adapter);
    22. return 0;
    23. ...
    24. }
    igb_request_msix 中对于多队列的⽹卡,为每⼀个队列都注册了中断,其对应的中断处理函数为 igb_msix_ring。
    参考

  • 相关阅读:
    通用树形结构的迭代与组合模式实现方案
    排序算法大总结
    基于SSM的传统文化网站
    crontab 实现秒级定时任务的执行(学习笔记)
    如何创建像 Quora 这样的问答网站:技术堆栈、用户获取等
    nginx中deny和allow详解
    基于5G边缘网关的储能在线监测方案
    SpringBoot 如何优雅的进行全局异常处理?
    一站式DevOps真的能提速增效吗?TVP吐槽大会邀您来验证
    22多校5 - Don‘t Starve(DP,依靠边更新端点)
  • 原文地址:https://blog.csdn.net/WANGYONGZIXUE/article/details/124898206