xfrm: iptfs: add skb-fragment sharing code

Avoid copying the inner packet data by sharing the skb data fragments
from the output packet skb into new inner packet skb.

Signed-off-by: Christian Hopps <chopps@labn.net>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
This commit is contained in:
Christian Hopps 2024-11-14 02:07:10 -05:00 committed by Steffen Klassert
parent 3f3339885f
commit 5f2b6a9095
1 changed files with 290 additions and 6 deletions

View File

@ -81,6 +81,9 @@
#define XFRM_IPTFS_MIN_L3HEADROOM 128
#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)
/* Min to try to share outer iptfs skb data vs copying into new skb */
#define IPTFS_PKT_SHARE_MIN 129
#define NSECS_IN_USEC 1000
#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
@ -234,10 +237,254 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
}
/**
* struct iptfs_skb_frag_walk - use to track a walk through fragments
* @fragi: current fragment index
* @past: length of data in fragments before @fragi
* @total: length of data in all fragments
* @nr_frags: number of fragments present in array
* @initial_offset: the value passed in to skb_prepare_frag_walk()
* @frags: the page fragments inc. room for head page
* @pp_recycle: copy of skb->pp_recycle
*/
struct iptfs_skb_frag_walk {
u32 fragi;
u32 past;
u32 total;
u32 nr_frags;
u32 initial_offset;
skb_frag_t frags[MAX_SKB_FRAGS + 1];
bool pp_recycle;
};
/**
* iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
* @skb: the skb to walk.
* @initial_offset: start the walk @initial_offset into the skb.
* @walk: the walk to initialize
*
* Future calls to skb_add_frags() will expect the @offset value to be at
* least @initial_offset large.
*/
static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
struct iptfs_skb_frag_walk *walk)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frag, *from;
u32 i;
walk->initial_offset = initial_offset;
walk->fragi = 0;
walk->past = 0;
walk->total = 0;
walk->nr_frags = 0;
walk->pp_recycle = skb->pp_recycle;
if (skb->head_frag) {
if (initial_offset >= skb_headlen(skb)) {
initial_offset -= skb_headlen(skb);
} else {
frag = &walk->frags[walk->nr_frags++];
iptfs_skb_head_to_frag(skb, frag);
frag->offset += initial_offset;
frag->len -= initial_offset;
walk->total += frag->len;
initial_offset = 0;
}
} else {
initial_offset -= skb_headlen(skb);
}
for (i = 0; i < shinfo->nr_frags; i++) {
from = &shinfo->frags[i];
if (initial_offset >= from->len) {
initial_offset -= from->len;
continue;
}
frag = &walk->frags[walk->nr_frags++];
*frag = *from;
if (initial_offset) {
frag->offset += initial_offset;
frag->len -= initial_offset;
initial_offset = 0;
}
walk->total += frag->len;
}
}
static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
u32 offset)
{
/* Adjust offset to refer to internal walk values */
offset -= walk->initial_offset;
/* Get to the correct fragment for offset */
while (offset < walk->past) {
walk->past -= walk->frags[--walk->fragi].len;
if (offset >= walk->past)
break;
}
while (offset >= walk->past + walk->frags[walk->fragi].len)
walk->past += walk->frags[walk->fragi++].len;
/* offset now relative to this current frag */
offset -= walk->past;
return offset;
}
/**
* iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
* @skb: skb to check for adding frags to
* @walk: the walk that will be used as source for frags.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* Return: true if ok to add frags.
*/
static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk,
u32 offset, u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fragi, nr_frags, fraglen;
if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
return false;
/* Make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);
/* Verify we have array space for the fragments we need to add */
fragi = walk->fragi;
nr_frags = shinfo->nr_frags;
while (len && fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[fragi];
fraglen = frag->len;
if (offset) {
fraglen -= offset;
offset = 0;
}
if (++nr_frags > MAX_SKB_FRAGS)
return false;
if (len <= fraglen)
return true;
len -= fraglen;
fragi++;
}
/* We may not copy all @len but what we have will fit. */
return true;
}
/**
* iptfs_skb_add_frags() - add a range of fragment references into an skb
* @skb: skb to add references into
* @walk: the walk to add referenced fragments from.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* iptfs_skb_can_add_frags() should be called before this function to verify
* that the destination @skb is compatible with the walk and has space in the
* array for the to be added frag references.
*
* Return: The number of bytes not added to @skb b/c we reached the end of the
* walk before adding all of @len.
*/
static int iptfs_skb_add_frags(struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk, u32 offset,
u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fraglen;
if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
return len;
/* make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);
while (len && walk->fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[walk->fragi];
skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];
*tofrag = *frag;
if (offset) {
tofrag->offset += offset;
tofrag->len -= offset;
offset = 0;
}
__skb_frag_ref(tofrag);
shinfo->nr_frags++;
/* see if we are done */
fraglen = tofrag->len;
if (len < fraglen) {
tofrag->len = len;
skb->len += len;
skb->data_len += len;
return 0;
}
/* advance to next source fragment */
len -= fraglen; /* careful, use dst bv_len */
skb->len += fraglen; /* careful, " " " */
skb->data_len += fraglen; /* careful, " " " */
walk->past += frag->len; /* careful, use src bv_len */
walk->fragi++;
}
return len;
}
/* ================================== */
/* IPTFS Receiving (egress) Functions */
/* ================================== */
/**
* iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
* @tpl: template to create new skb from.
* @walk: The source for fragments to add.
* @off: The offset into @walk to add frags from, also used with @st and
* @copy_len.
* @len: The length of data to add covering frags from @walk into @skb.
* This must be <= @skblen.
* @st: The sequence state to copy from into the new head skb.
* @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
* linear space.
*
* Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
* @st into the new skb linear space, and then add shared fragments from the
* frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
*
* Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
*/
static struct sk_buff *
iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
{
struct sk_buff *skb;
skb = iptfs_alloc_skb(tpl, copy_len, false);
if (!skb)
return NULL;
/* this should not normally be happening */
if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
len - copy_len)) {
kfree_skb(skb);
return NULL;
}
if (copy_len &&
skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
XFRM_INC_STATS(dev_net(st->root_skb->dev),
LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return NULL;
}
iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
return skb;
}
/**
* iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
* @skblen: the total data size for `skb`.
@ -423,6 +670,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
struct skb_seq_state *st, struct sk_buff *skb,
u32 data, u32 blkoff, struct list_head *list)
{
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct sk_buff *newskb = xtfs->ra_newskb;
u32 remaining = skb->len - data;
u32 runtlen = xtfs->ra_runtlen;
@ -567,10 +816,26 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
fraglen = min(blkoff, remaining);
copylen = min(fraglen, ipremain);
/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) {
XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
/* If we may have the opportunity to share prepare a fragwalk. */
if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
(skb->head_frag || skb->len == skb->data_len) &&
skb->pp_recycle == newskb->pp_recycle) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
}
/* Try share then copy. */
if (fragwalk &&
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
} else {
/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
copylen)) {
XFRM_INC_STATS(xs_net(xtfs->x),
LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
}
}
if (copylen < ipremain) {
@ -601,6 +866,8 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
struct list_head *sublist)
{
u8 hbytes[sizeof(struct ipv6hdr)];
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct sk_buff *defer, *first_skb, *next, *skb;
const unsigned char *old_mac;
struct xfrm_iptfs_data *xtfs;
@ -694,6 +961,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
} else {
first_skb = skb;
first_iplen = iplen;
fragwalk = NULL;
/* We are going to skip over `data` bytes to reach the
* start of the IP header of `iphlen` len for `iplen`
@ -745,6 +1013,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
/* all pointers could be changed now reset walk */
skb_abort_seq_read(skbseq);
skb_prepare_seq_read(skb, data, tail, skbseq);
} else if (skb->head_frag &&
/* We have the IP header right now */
remaining >= iphlen) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
defer = skb;
skb = NULL;
} else {
/* We couldn't reuse the input skb so allocate a
* new one.
@ -760,8 +1035,17 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
capturelen = min(iplen, remaining);
if (!skb) {
skb = iptfs_pskb_extract_seq(iplen, skbseq, data,
capturelen);
if (!fragwalk ||
/* Large enough to be worth sharing */
iplen < IPTFS_PKT_SHARE_MIN ||
/* Have IP header + some data to share. */
capturelen <= iphlen ||
/* Try creating skb and adding frags */
!(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
data, capturelen,
skbseq, iphlen))) {
skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
}
if (!skb) {
/* skip to next packet or done */
data += capturelen;