123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 |
- /*
- * net/sched/sch_choke.c CHOKE scheduler
- *
- * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
- * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- */
- #include <linux/module.h>
- #include <linux/types.h>
- #include <linux/kernel.h>
- #include <linux/skbuff.h>
- #include <linux/reciprocal_div.h>
- #include <linux/vmalloc.h>
- #include <net/pkt_sched.h>
- #include <net/inet_ecn.h>
- #include <net/red.h>
- #include <linux/ip.h>
- #include <net/ip.h>
- #include <linux/ipv6.h>
- #include <net/ipv6.h>
- /*
- CHOKe stateless AQM for fair bandwidth allocation
- =================================================
- CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
- unresponsive flows) is a variant of RED that penalizes misbehaving flows but
- maintains no flow state. The difference from RED is an additional step
- during the enqueuing process. If average queue size is over the
- low threshold (qmin), a packet is chosen at random from the queue.
- If both the new and chosen packet are from the same flow, both
- are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
- needs to access packets in queue randomly. It has a minimal class
- interface to allow overriding the builtin flow classifier with
- filters.
- Source:
- R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
- Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
- IEEE INFOCOM, 2000.
- A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
- Characteristics", IEEE/ACM Transactions on Networking, 2004
- */
- /* Upper bound on size of sk_buff table (packets) */
- #define CHOKE_MAX_QUEUE (128*1024 - 1)
- struct choke_sched_data {
- /* Parameters */
- u32 limit;
- unsigned char flags;
- struct red_parms parms;
- /* Variables */
- struct tcf_proto *filter_list;
- struct {
- u32 prob_drop; /* Early probability drops */
- u32 prob_mark; /* Early probability marks */
- u32 forced_drop; /* Forced drops, qavg > max_thresh */
- u32 forced_mark; /* Forced marks, qavg > max_thresh */
- u32 pdrop; /* Drops due to queue limits */
- u32 other; /* Drops due to drop() calls */
- u32 matched; /* Drops to flow match */
- } stats;
- unsigned int head;
- unsigned int tail;
- unsigned int tab_mask; /* size - 1 */
- struct sk_buff **tab;
- };
- /* deliver a random number between 0 and N - 1 */
- static u32 random_N(unsigned int N)
- {
- return reciprocal_divide(random32(), N);
- }
- /* number of elements in queue including holes */
- static unsigned int choke_len(const struct choke_sched_data *q)
- {
- return (q->tail - q->head) & q->tab_mask;
- }
- /* Is ECN parameter configured */
- static int use_ecn(const struct choke_sched_data *q)
- {
- return q->flags & TC_RED_ECN;
- }
- /* Should packets over max just be dropped (versus marked) */
- static int use_harddrop(const struct choke_sched_data *q)
- {
- return q->flags & TC_RED_HARDDROP;
- }
- /* Move head pointer forward to skip over holes */
- static void choke_zap_head_holes(struct choke_sched_data *q)
- {
- do {
- q->head = (q->head + 1) & q->tab_mask;
- if (q->head == q->tail)
- break;
- } while (q->tab[q->head] == NULL);
- }
- /* Move tail pointer backwards to reuse holes */
- static void choke_zap_tail_holes(struct choke_sched_data *q)
- {
- do {
- q->tail = (q->tail - 1) & q->tab_mask;
- if (q->head == q->tail)
- break;
- } while (q->tab[q->tail] == NULL);
- }
- /* Drop packet from queue array by creating a "hole" */
- static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb = q->tab[idx];
- q->tab[idx] = NULL;
- if (idx == q->head)
- choke_zap_head_holes(q);
- if (idx == q->tail)
- choke_zap_tail_holes(q);
- sch->qstats.backlog -= qdisc_pkt_len(skb);
- qdisc_drop(skb, sch);
- qdisc_tree_decrease_qlen(sch, 1);
- --sch->q.qlen;
- }
- /*
- * Compare flow of two packets
- * Returns true only if source and destination address and port match.
- * false for special cases
- */
- static bool choke_match_flow(struct sk_buff *skb1,
- struct sk_buff *skb2)
- {
- int off1, off2, poff;
- const u32 *ports1, *ports2;
- u8 ip_proto;
- __u32 hash1;
- if (skb1->protocol != skb2->protocol)
- return false;
- /* Use hash value as quick check
- * Assumes that __skb_get_rxhash makes IP header and ports linear
- */
- hash1 = skb_get_rxhash(skb1);
- if (!hash1 || hash1 != skb_get_rxhash(skb2))
- return false;
- /* Probably match, but be sure to avoid hash collisions */
- off1 = skb_network_offset(skb1);
- off2 = skb_network_offset(skb2);
- switch (skb1->protocol) {
- case __constant_htons(ETH_P_IP): {
- const struct iphdr *ip1, *ip2;
- ip1 = (const struct iphdr *) (skb1->data + off1);
- ip2 = (const struct iphdr *) (skb2->data + off2);
- ip_proto = ip1->protocol;
- if (ip_proto != ip2->protocol ||
- ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
- return false;
- if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
- ip_proto = 0;
- off1 += ip1->ihl * 4;
- off2 += ip2->ihl * 4;
- break;
- }
- case __constant_htons(ETH_P_IPV6): {
- const struct ipv6hdr *ip1, *ip2;
- ip1 = (const struct ipv6hdr *) (skb1->data + off1);
- ip2 = (const struct ipv6hdr *) (skb2->data + off2);
- ip_proto = ip1->nexthdr;
- if (ip_proto != ip2->nexthdr ||
- ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
- ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
- return false;
- off1 += 40;
- off2 += 40;
- }
- default: /* Maybe compare MAC header here? */
- return false;
- }
- poff = proto_ports_offset(ip_proto);
- if (poff < 0)
- return true;
- off1 += poff;
- off2 += poff;
- ports1 = (__force u32 *)(skb1->data + off1);
- ports2 = (__force u32 *)(skb2->data + off2);
- return *ports1 == *ports2;
- }
- struct choke_skb_cb {
- u16 classid;
- };
- static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
- {
- qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
- return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
- }
- static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
- {
- choke_skb_cb(skb)->classid = classid;
- }
- static u16 choke_get_classid(const struct sk_buff *skb)
- {
- return choke_skb_cb(skb)->classid;
- }
- /*
- * Classify flow using either:
- * 1. pre-existing classification result in skb
- * 2. fast internal classification
- * 3. use TC filter based classification
- */
- static bool choke_classify(struct sk_buff *skb,
- struct Qdisc *sch, int *qerr)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct tcf_result res;
- int result;
- result = tc_classify(skb, q->filter_list, &res);
- if (result >= 0) {
- #ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- return false;
- }
- #endif
- choke_set_classid(skb, TC_H_MIN(res.classid));
- return true;
- }
- return false;
- }
- /*
- * Select a packet at random from queue
- * HACK: since queue can have holes from previous deletion; retry several
- * times to find a random skb but then just give up and return the head
- * Will return NULL if queue is empty (q->head == q->tail)
- */
- static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
- unsigned int *pidx)
- {
- struct sk_buff *skb;
- int retrys = 3;
- do {
- *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
- skb = q->tab[*pidx];
- if (skb)
- return skb;
- } while (--retrys > 0);
- return q->tab[*pidx = q->head];
- }
- /*
- * Compare new packet with random packet in queue
- * returns true if matched and sets *pidx
- */
- static bool choke_match_random(const struct choke_sched_data *q,
- struct sk_buff *nskb,
- unsigned int *pidx)
- {
- struct sk_buff *oskb;
- if (q->head == q->tail)
- return false;
- oskb = choke_peek_random(q, pidx);
- if (q->filter_list)
- return choke_get_classid(nskb) == choke_get_classid(oskb);
- return choke_match_flow(oskb, nskb);
- }
- static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct red_parms *p = &q->parms;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- if (q->filter_list) {
- /* If using external classifiers, get result and record it. */
- if (!choke_classify(skb, sch, &ret))
- goto other_drop; /* Packet was eaten by filter */
- }
- /* Compute average queue usage (see RED) */
- p->qavg = red_calc_qavg(p, sch->q.qlen);
- if (red_is_idling(p))
- red_end_of_idle_period(p);
- /* Is queue small? */
- if (p->qavg <= p->qth_min)
- p->qcount = -1;
- else {
- unsigned int idx;
- /* Draw a packet at random from queue and compare flow */
- if (choke_match_random(q, skb, &idx)) {
- q->stats.matched++;
- choke_drop_by_idx(sch, idx);
- goto congestion_drop;
- }
- /* Queue is large, always mark/drop */
- if (p->qavg > p->qth_max) {
- p->qcount = -1;
- sch->qstats.overlimits++;
- if (use_harddrop(q) || !use_ecn(q) ||
- !INET_ECN_set_ce(skb)) {
- q->stats.forced_drop++;
- goto congestion_drop;
- }
- q->stats.forced_mark++;
- } else if (++p->qcount) {
- if (red_mark_probability(p, p->qavg)) {
- p->qcount = 0;
- p->qR = red_random(p);
- sch->qstats.overlimits++;
- if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
- q->stats.prob_drop++;
- goto congestion_drop;
- }
- q->stats.prob_mark++;
- }
- } else
- p->qR = red_random(p);
- }
- /* Admit new packet */
- if (sch->q.qlen < q->limit) {
- q->tab[q->tail] = skb;
- q->tail = (q->tail + 1) & q->tab_mask;
- ++sch->q.qlen;
- sch->qstats.backlog += qdisc_pkt_len(skb);
- return NET_XMIT_SUCCESS;
- }
- q->stats.pdrop++;
- sch->qstats.drops++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
- congestion_drop:
- qdisc_drop(skb, sch);
- return NET_XMIT_CN;
- other_drop:
- if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
- }
- static struct sk_buff *choke_dequeue(struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- if (q->head == q->tail) {
- if (!red_is_idling(&q->parms))
- red_start_of_idle_period(&q->parms);
- return NULL;
- }
- skb = q->tab[q->head];
- q->tab[q->head] = NULL;
- choke_zap_head_holes(q);
- --sch->q.qlen;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
- qdisc_bstats_update(sch, skb);
- return skb;
- }
- static unsigned int choke_drop(struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- unsigned int len;
- len = qdisc_queue_drop(sch);
- if (len > 0)
- q->stats.other++;
- else {
- if (!red_is_idling(&q->parms))
- red_start_of_idle_period(&q->parms);
- }
- return len;
- }
- static void choke_reset(struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- red_restart(&q->parms);
- }
- static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
- [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
- [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
- };
- static void choke_free(void *addr)
- {
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
- }
- static int choke_change(struct Qdisc *sch, struct nlattr *opt)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_CHOKE_MAX + 1];
- const struct tc_red_qopt *ctl;
- int err;
- struct sk_buff **old = NULL;
- unsigned int mask;
- if (opt == NULL)
- return -EINVAL;
- err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
- if (err < 0)
- return err;
- if (tb[TCA_CHOKE_PARMS] == NULL ||
- tb[TCA_CHOKE_STAB] == NULL)
- return -EINVAL;
- ctl = nla_data(tb[TCA_CHOKE_PARMS]);
- if (ctl->limit > CHOKE_MAX_QUEUE)
- return -EINVAL;
- mask = roundup_pow_of_two(ctl->limit + 1) - 1;
- if (mask != q->tab_mask) {
- struct sk_buff **ntab;
- ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
- if (!ntab)
- ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
- if (!ntab)
- return -ENOMEM;
- sch_tree_lock(sch);
- old = q->tab;
- if (old) {
- unsigned int oqlen = sch->q.qlen, tail = 0;
- while (q->head != q->tail) {
- struct sk_buff *skb = q->tab[q->head];
- q->head = (q->head + 1) & q->tab_mask;
- if (!skb)
- continue;
- if (tail < mask) {
- ntab[tail++] = skb;
- continue;
- }
- sch->qstats.backlog -= qdisc_pkt_len(skb);
- --sch->q.qlen;
- qdisc_drop(skb, sch);
- }
- qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
- q->head = 0;
- q->tail = tail;
- }
- q->tab_mask = mask;
- q->tab = ntab;
- } else
- sch_tree_lock(sch);
- q->flags = ctl->flags;
- q->limit = ctl->limit;
- red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
- ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]));
- if (q->head == q->tail)
- red_end_of_idle_period(&q->parms);
- sch_tree_unlock(sch);
- choke_free(old);
- return 0;
- }
- static int choke_init(struct Qdisc *sch, struct nlattr *opt)
- {
- return choke_change(sch, opt);
- }
- static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct nlattr *opts = NULL;
- struct tc_red_qopt opt = {
- .limit = q->limit,
- .flags = q->flags,
- .qth_min = q->parms.qth_min >> q->parms.Wlog,
- .qth_max = q->parms.qth_max >> q->parms.Wlog,
- .Wlog = q->parms.Wlog,
- .Plog = q->parms.Plog,
- .Scell_log = q->parms.Scell_log,
- };
- opts = nla_nest_start(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
- return nla_nest_end(skb, opts);
- nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
- }
- static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- struct tc_choke_xstats st = {
- .early = q->stats.prob_drop + q->stats.forced_drop,
- .marked = q->stats.prob_mark + q->stats.forced_mark,
- .pdrop = q->stats.pdrop,
- .other = q->stats.other,
- .matched = q->stats.matched,
- };
- return gnet_stats_copy_app(d, &st, sizeof(st));
- }
- static void choke_destroy(struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
- choke_free(q->tab);
- }
- static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
- {
- return NULL;
- }
- static unsigned long choke_get(struct Qdisc *sch, u32 classid)
- {
- return 0;
- }
- static void choke_put(struct Qdisc *q, unsigned long cl)
- {
- }
- static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
- u32 classid)
- {
- return 0;
- }
- static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- if (cl)
- return NULL;
- return &q->filter_list;
- }
- static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
- {
- tcm->tcm_handle |= TC_H_MIN(cl);
- return 0;
- }
- static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
- {
- if (!arg->stop) {
- if (arg->fn(sch, 1, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
- }
- static const struct Qdisc_class_ops choke_class_ops = {
- .leaf = choke_leaf,
- .get = choke_get,
- .put = choke_put,
- .tcf_chain = choke_find_tcf,
- .bind_tcf = choke_bind,
- .unbind_tcf = choke_put,
- .dump = choke_dump_class,
- .walk = choke_walk,
- };
- static struct sk_buff *choke_peek_head(struct Qdisc *sch)
- {
- struct choke_sched_data *q = qdisc_priv(sch);
- return (q->head != q->tail) ? q->tab[q->head] : NULL;
- }
- static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
- .id = "choke",
- .priv_size = sizeof(struct choke_sched_data),
- .enqueue = choke_enqueue,
- .dequeue = choke_dequeue,
- .peek = choke_peek_head,
- .drop = choke_drop,
- .init = choke_init,
- .destroy = choke_destroy,
- .reset = choke_reset,
- .change = choke_change,
- .dump = choke_dump,
- .dump_stats = choke_dump_stats,
- .owner = THIS_MODULE,
- };
- static int __init choke_module_init(void)
- {
- return register_qdisc(&choke_qdisc_ops);
- }
- static void __exit choke_module_exit(void)
- {
- unregister_qdisc(&choke_qdisc_ops);
- }
- module_init(choke_module_init)
- module_exit(choke_module_exit)
- MODULE_LICENSE("GPL");
|