Linux sch_fq公平队列FQ流分类与credit机制

发布时间:2026/6/23 0:41:11

Linux sch_fq公平队列FQ流分类与credit机制 Linux sch_fq公平队列FQ流分类与credit机制Fair QueueFQqdisc位于net/sched/sch_fq.c核心目标是每个流flow一个FIFO队列按轮询DRR, Deficit Round Robin方式调度保证各流间的公平性同时支持 pacing 和 per-flow 限速。FQ的流分类基于sk_buff的hash值默认使用内核计算的skb-hash。fq_classify函数将报文分配到对应流static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb){struct fq_sched_data *q qdisc_priv(sch);struct fq_flow *f;u32 hash;if (skb-protocol htons(ETH_P_IP) ||skb-protocol htons(ETH_P_IPV6)) {hash skb_get_hash_perturb(skb, q-perturbation);} else {hash skb-hash;if (!hash)hash (u32)(unsigned long)skb_dst(skb) ^skb-sk_hash;}hash reciprocal_scale(hash, FQ_HASH_SIZE);f q-flows[hash];if (f-flowchain.prev NULL) {struct fq_flow *new_flow;new_flow fq_find_fitting_flow(q, skb, hash);if (new_flow)return new_flow;if (f-qlen 1 || f-stats.stoll q-flow_refill)return q-internal;f-fq_tin skb_find_txq(skb, q-flow_max_rate ?q-rate_limiting_struct : NULL);}return f;}每个flow通过定时器进行pacing控制。fq_dequeue是核心调度函数static struct sk_buff *fq_dequeue(struct Qdisc *sch){struct fq_sched_data *q qdisc_priv(sch);struct fq_flow *f;struct sk_buff *skb;u32 now ktime_get_ns();s64 credit;f list_first_entry_or_null(q-new_flows, struct fq_flow,flowchain);if (!f) {f list_first_entry_or_null(q-old_flows, struct fq_flow,flowchain);if (!f)return NULL;}if (f-time_next_packet now) {if (!q-timer_active) {q-timer_active true;hrtimer_start(q-fq_timer,ns_to_ktime(f-time_next_packet - now),HRTIMER_MODE_REL_PINNED);}return NULL;}credit f-credit;skb f-head;if (skb) {credit - skb-len;if (credit 0 !q-rate_enable) {return NULL;}__skb_unlink(skb, f-queue);sch-q.qlen--;f-credit credit;if (f-credit 0) {f-credit q-quantum;list_move_tail(f-flowchain, q-old_flows);}}if (f-credit 0 f-qlen 0)list_move_tail(f-flowchain, q-new_flows);else if (f-qlen 0)list_del_init(f-flowchain);if (q-rate_enable) {f-time_next_packet now max_t(u64, q-flow_max_rate ?div64_u64(skb-len * 1000ULL * NSEC_PER_USEC,q-flow_max_rate) : 0,skb-len * q-pacing_divider);}return skb;}credit机制是DRR的核心。每个flow拥有一个credit计数器初始值为quantum。每次从该flow出队一个报文减去其长度。当credit变为负数或零以下时该flow被移到old_flows链表并补充一个quantum的credit。调度器优先服务new_flows链表中的flow只有new_flows为空时才处理old_flows。这种设计确保新产生的active flow不会被old_flows中的flow饿死。quantum参数的默认值在fq_change函数中设定static int fq_change(struct Qdisc *sch, struct nlattr *opt,struct netlink_ext_ack *extack){struct fq_sched_data *q qdisc_priv(sch);struct tc_fq_qopt *ctl nla_data(opt);if (ctl-quantum)q-quantum ctl-quantum;elseq-quantum 2 * psched_mtu(qdisc_dev(sch));if (ctl-initial_quantum)q-initial_quantum ctl-initial_quantum;elseq-initial_quantum 0;if (ctl-maxrate) {q-flow_max_rate ctl-maxrate;q-rate_enable 1;}if (ctl-pacing) {q-pacing_divisor ctl-pacing;}}在credit计算中fq_dequeue对每个flow出队后重新计算creditstatic void fq_flow_add_to_list(struct fq_sched_data *q,struct fq_flow *f){if (f-credit 0) {f-credit q-quantum;list_add_tail(f-flowchain, q-old_flows);} else {list_add_tail(f-flowchain, q-new_flows);}}flow从new_flows转移到old_flows的条件是credit耗尽转移后补充quantum credit。但如果补充后credit仍为正则留在new_flows继续参与调度。这种设计保证credit用尽的flow暂时让出调度机会实现按字节严格公平。per-flow的pacing依靠fq-time_next_packet字段hrtimer到期后才会调用fq_dequeue控制报文发送速率。timer回调函数为fq_timerstatic enum hrtimer_restart fq_timer(struct hrtimer *timer){struct fq_sched_data *q container_of(timer, struct fq_sched_data,fq_timer);struct Qdisc *sch q-sch;struct net_device *dev qdisc_dev(sch);q-timer_active false;__netif_schedule(sch);return HRTIMER_NORESTART;}timer仅触发一次调度fq_dequeue在必要时重新启新timer。这种defer机制将pacing粒度控制在纳秒级适合高速网卡下精确流量整形。

相关新闻