123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549 |
- /*
- * linux/net/ipv4/inet_lro.c
- *
- * Large Receive Offload (ipv4 / tcp)
- *
- * (C) Copyright IBM Corp. 2007
- *
- * Authors:
- * Jan-Bernd Themann <themann@de.ibm.com>
- * Christoph Raisch <raisch@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- #include <linux/module.h>
- #include <linux/if_vlan.h>
- #include <linux/inet_lro.h>
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
- MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
- #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
- #define IP_HDR_LEN(iph) (iph->ihl << 2)
- #define TCP_PAYLOAD_LENGTH(iph, tcph) \
- (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
- #define IPH_LEN_WO_OPTIONS 5
- #define TCPH_LEN_WO_OPTIONS 5
- #define TCPH_LEN_W_TIMESTAMP 8
- #define LRO_MAX_PG_HLEN 64
- #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
- /*
- * Basic tcp checks whether packet is suitable for LRO
- */
- static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
- int len, const struct net_lro_desc *lro_desc)
- {
- /* check ip header: don't aggregate padded frames */
- if (ntohs(iph->tot_len) != len)
- return -1;
- if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
- return -1;
- if (iph->ihl != IPH_LEN_WO_OPTIONS)
- return -1;
- if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
- tcph->rst || tcph->syn || tcph->fin)
- return -1;
- if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
- return -1;
- if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
- tcph->doff != TCPH_LEN_W_TIMESTAMP)
- return -1;
- /* check tcp options (only timestamp allowed) */
- if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
- __be32 *topt = (__be32 *)(tcph + 1);
- if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
- return -1;
- /* timestamp should be in right order */
- topt++;
- if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
- ntohl(*topt)))
- return -1;
- /* timestamp reply should not be zero */
- topt++;
- if (*topt == 0)
- return -1;
- }
- return 0;
- }
- static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
- {
- struct iphdr *iph = lro_desc->iph;
- struct tcphdr *tcph = lro_desc->tcph;
- __be32 *p;
- __wsum tcp_hdr_csum;
- tcph->ack_seq = lro_desc->tcp_ack;
- tcph->window = lro_desc->tcp_window;
- if (lro_desc->tcp_saw_tstamp) {
- p = (__be32 *)(tcph + 1);
- *(p+2) = lro_desc->tcp_rcv_tsecr;
- }
- iph->tot_len = htons(lro_desc->ip_tot_len);
- iph->check = 0;
- iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
- tcph->check = 0;
- tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
- lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
- tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- lro_desc->ip_tot_len -
- IP_HDR_LEN(iph), IPPROTO_TCP,
- lro_desc->data_csum);
- }
- static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
- {
- __wsum tcp_csum;
- __wsum tcp_hdr_csum;
- __wsum tcp_ps_hdr_csum;
- tcp_csum = ~csum_unfold(tcph->check);
- tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
- tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- len + TCP_HDR_LEN(tcph),
- IPPROTO_TCP, 0);
- return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
- tcp_ps_hdr_csum);
- }
- static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
- {
- int nr_frags;
- __be32 *ptr;
- u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
- nr_frags = skb_shinfo(skb)->nr_frags;
- lro_desc->parent = skb;
- lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
- lro_desc->iph = iph;
- lro_desc->tcph = tcph;
- lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
- lro_desc->tcp_ack = tcph->ack_seq;
- lro_desc->tcp_window = tcph->window;
- lro_desc->pkt_aggr_cnt = 1;
- lro_desc->ip_tot_len = ntohs(iph->tot_len);
- if (tcph->doff == 8) {
- ptr = (__be32 *)(tcph+1);
- lro_desc->tcp_saw_tstamp = 1;
- lro_desc->tcp_rcv_tsval = *(ptr+1);
- lro_desc->tcp_rcv_tsecr = *(ptr+2);
- }
- lro_desc->mss = tcp_data_len;
- lro_desc->active = 1;
- lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
- tcp_data_len);
- }
- static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
- {
- memset(lro_desc, 0, sizeof(struct net_lro_desc));
- }
- static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
- struct tcphdr *tcph, int tcp_data_len)
- {
- struct sk_buff *parent = lro_desc->parent;
- __be32 *topt;
- lro_desc->pkt_aggr_cnt++;
- lro_desc->ip_tot_len += tcp_data_len;
- lro_desc->tcp_next_seq += tcp_data_len;
- lro_desc->tcp_window = tcph->window;
- lro_desc->tcp_ack = tcph->ack_seq;
- /* don't update tcp_rcv_tsval, would not work with PAWS */
- if (lro_desc->tcp_saw_tstamp) {
- topt = (__be32 *) (tcph + 1);
- lro_desc->tcp_rcv_tsecr = *(topt + 2);
- }
- lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
- lro_tcp_data_csum(iph, tcph,
- tcp_data_len),
- parent->len);
- parent->len += tcp_data_len;
- parent->data_len += tcp_data_len;
- if (tcp_data_len > lro_desc->mss)
- lro_desc->mss = tcp_data_len;
- }
- static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
- {
- struct sk_buff *parent = lro_desc->parent;
- int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
- lro_add_common(lro_desc, iph, tcph, tcp_data_len);
- skb_pull(skb, (skb->len - tcp_data_len));
- parent->truesize += skb->truesize;
- if (lro_desc->last_skb)
- lro_desc->last_skb->next = skb;
- else
- skb_shinfo(parent)->frag_list = skb;
- lro_desc->last_skb = skb;
- }
- static void lro_add_frags(struct net_lro_desc *lro_desc,
- int len, int hlen, int truesize,
- struct skb_frag_struct *skb_frags,
- struct iphdr *iph, struct tcphdr *tcph)
- {
- struct sk_buff *skb = lro_desc->parent;
- int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
- lro_add_common(lro_desc, iph, tcph, tcp_data_len);
- skb->truesize += truesize;
- skb_frags[0].page_offset += hlen;
- skb_frag_size_sub(&skb_frags[0], hlen);
- while (tcp_data_len > 0) {
- *(lro_desc->next_frag) = *skb_frags;
- tcp_data_len -= skb_frag_size(skb_frags);
- lro_desc->next_frag++;
- skb_frags++;
- skb_shinfo(skb)->nr_frags++;
- }
- }
- static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
- struct iphdr *iph,
- struct tcphdr *tcph)
- {
- if ((lro_desc->iph->saddr != iph->saddr) ||
- (lro_desc->iph->daddr != iph->daddr) ||
- (lro_desc->tcph->source != tcph->source) ||
- (lro_desc->tcph->dest != tcph->dest))
- return -1;
- return 0;
- }
- static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
- struct net_lro_desc *lro_arr,
- struct iphdr *iph,
- struct tcphdr *tcph)
- {
- struct net_lro_desc *lro_desc = NULL;
- struct net_lro_desc *tmp;
- int max_desc = lro_mgr->max_desc;
- int i;
- for (i = 0; i < max_desc; i++) {
- tmp = &lro_arr[i];
- if (tmp->active)
- if (!lro_check_tcp_conn(tmp, iph, tcph)) {
- lro_desc = tmp;
- goto out;
- }
- }
- for (i = 0; i < max_desc; i++) {
- if (!lro_arr[i].active) {
- lro_desc = &lro_arr[i];
- goto out;
- }
- }
- LRO_INC_STATS(lro_mgr, no_desc);
- out:
- return lro_desc;
- }
- static void lro_flush(struct net_lro_mgr *lro_mgr,
- struct net_lro_desc *lro_desc)
- {
- if (lro_desc->pkt_aggr_cnt > 1)
- lro_update_tcp_ip_header(lro_desc);
- skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
- if (lro_mgr->features & LRO_F_NAPI)
- netif_receive_skb(lro_desc->parent);
- else
- netif_rx(lro_desc->parent);
- LRO_INC_STATS(lro_mgr, flushed);
- lro_clear_desc(lro_desc);
- }
- static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
- void *priv)
- {
- struct net_lro_desc *lro_desc;
- struct iphdr *iph;
- struct tcphdr *tcph;
- u64 flags;
- int vlan_hdr_len = 0;
- if (!lro_mgr->get_skb_header ||
- lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
- &flags, priv))
- goto out;
- if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
- goto out;
- lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
- if (!lro_desc)
- goto out;
- if ((skb->protocol == htons(ETH_P_8021Q)) &&
- !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
- vlan_hdr_len = VLAN_HLEN;
- if (!lro_desc->active) { /* start new lro session */
- if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
- goto out;
- skb->ip_summed = lro_mgr->ip_summed_aggr;
- lro_init_desc(lro_desc, skb, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
- return 0;
- }
- if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
- goto out2;
- if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
- goto out2;
- lro_add_packet(lro_desc, skb, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
- if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
- lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
- lro_flush(lro_mgr, lro_desc);
- return 0;
- out2: /* send aggregated SKBs to stack */
- lro_flush(lro_mgr, lro_desc);
- out:
- return 1;
- }
- static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
- struct skb_frag_struct *frags,
- int len, int true_size,
- void *mac_hdr,
- int hlen, __wsum sum,
- u32 ip_summed)
- {
- struct sk_buff *skb;
- struct skb_frag_struct *skb_frags;
- int data_len = len;
- int hdr_len = min(len, hlen);
- skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
- if (!skb)
- return NULL;
- skb_reserve(skb, lro_mgr->frag_align_pad);
- skb->len = len;
- skb->data_len = len - hdr_len;
- skb->truesize += true_size;
- skb->tail += hdr_len;
- memcpy(skb->data, mac_hdr, hdr_len);
- skb_frags = skb_shinfo(skb)->frags;
- while (data_len > 0) {
- *skb_frags = *frags;
- data_len -= skb_frag_size(frags);
- skb_frags++;
- frags++;
- skb_shinfo(skb)->nr_frags++;
- }
- skb_shinfo(skb)->frags[0].page_offset += hdr_len;
- skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
- skb->ip_summed = ip_summed;
- skb->csum = sum;
- skb->protocol = eth_type_trans(skb, lro_mgr->dev);
- return skb;
- }
- static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
- struct skb_frag_struct *frags,
- int len, int true_size,
- void *priv, __wsum sum)
- {
- struct net_lro_desc *lro_desc;
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct sk_buff *skb;
- u64 flags;
- void *mac_hdr;
- int mac_hdr_len;
- int hdr_len = LRO_MAX_PG_HLEN;
- int vlan_hdr_len = 0;
- if (!lro_mgr->get_frag_header ||
- lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
- (void *)&tcph, &flags, priv)) {
- mac_hdr = skb_frag_address(frags);
- goto out1;
- }
- if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
- goto out1;
- hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
- mac_hdr_len = (int)((void *)(iph) - mac_hdr);
- lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
- if (!lro_desc)
- goto out1;
- if (!lro_desc->active) { /* start new lro session */
- if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
- goto out1;
- skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
- hdr_len, 0, lro_mgr->ip_summed_aggr);
- if (!skb)
- goto out;
- if ((skb->protocol == htons(ETH_P_8021Q)) &&
- !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
- vlan_hdr_len = VLAN_HLEN;
- iph = (void *)(skb->data + vlan_hdr_len);
- tcph = (void *)((u8 *)skb->data + vlan_hdr_len
- + IP_HDR_LEN(iph));
- lro_init_desc(lro_desc, skb, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
- return NULL;
- }
- if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
- goto out2;
- if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
- goto out2;
- lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
- if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
- lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
- lro_flush(lro_mgr, lro_desc);
- return NULL;
- out2: /* send aggregated packets to the stack */
- lro_flush(lro_mgr, lro_desc);
- out1: /* Original packet has to be posted to the stack */
- skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
- hdr_len, sum, lro_mgr->ip_summed);
- out:
- return skb;
- }
- void lro_receive_skb(struct net_lro_mgr *lro_mgr,
- struct sk_buff *skb,
- void *priv)
- {
- if (__lro_proc_skb(lro_mgr, skb, priv)) {
- if (lro_mgr->features & LRO_F_NAPI)
- netif_receive_skb(skb);
- else
- netif_rx(skb);
- }
- }
- EXPORT_SYMBOL(lro_receive_skb);
- void lro_receive_frags(struct net_lro_mgr *lro_mgr,
- struct skb_frag_struct *frags,
- int len, int true_size, void *priv, __wsum sum)
- {
- struct sk_buff *skb;
- skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
- if (!skb)
- return;
- if (lro_mgr->features & LRO_F_NAPI)
- netif_receive_skb(skb);
- else
- netif_rx(skb);
- }
- EXPORT_SYMBOL(lro_receive_frags);
- void lro_flush_all(struct net_lro_mgr *lro_mgr)
- {
- int i;
- struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
- for (i = 0; i < lro_mgr->max_desc; i++) {
- if (lro_desc[i].active)
- lro_flush(lro_mgr, &lro_desc[i]);
- }
- }
- EXPORT_SYMBOL(lro_flush_all);
- void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
- struct iphdr *iph, struct tcphdr *tcph)
- {
- struct net_lro_desc *lro_desc;
- lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
- if (lro_desc->active)
- lro_flush(lro_mgr, lro_desc);
- }
- EXPORT_SYMBOL(lro_flush_pkt);
|