net: infrastructure for hardware time stamping

The additional per-packet information (16 bytes for time stamps, 1
byte for flags) is stored for all packets in the skb_shared_info
struct. This implementation detail is hidden from users of that
information via skb_* accessor functions. A separate struct resp.
union is used for the additional information so that it can be
stored/copied easily outside of skb_shared_info.

Compared to previous implementations (reusing the tstamp field
depending on the context, optional additional structures) this
is the simplest solution. It does not extend sk_buff itself.

TX time stamping is implemented in software if the device driver
doesn't support hardware time stamping.

The new semantic for hardware/software time stamping around
ndo_start_xmit() is based on two assumptions about existing
network device drivers which don't support hardware time
stamping and know nothing about it:
 - they leave the new skb_shared_tx unmodified
 - the keep the connection to the originating socket in skb->sk
   alive, i.e., don't call skb_orphan()

Given that skb_shared_tx is new, the first assumption is safe.
The second is only true for some drivers. As a result, software
TX time stamping currently works with the bnx2 driver, but not
with the unmodified igb driver (the two drivers this patch series
was tested with).

Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Patrick Ohly 2009-02-12 05:03:37 +00:00 committed by David S. Miller
parent cb9eff0978
commit ac45f602ee
3 changed files with 161 additions and 3 deletions

View file

@ -132,6 +132,57 @@ struct skb_frag_struct {
__u32 size;
};
#define HAVE_HW_TIME_STAMP
/**
* skb_shared_hwtstamps - hardware time stamps
*
* @hwtstamp: hardware time stamp transformed into duration
* since arbitrary point in time
* @syststamp: hwtstamp transformed to system time base
*
* Software time stamps generated by ktime_get_real() are stored in
* skb->tstamp. The relation between the different kinds of time
* stamps is as follows:
*
* syststamp and tstamp can be compared against each other in
* arbitrary combinations. The accuracy of a
* syststamp/tstamp/"syststamp from other device" comparison is
* limited by the accuracy of the transformation into system time
* base. This depends on the device driver and its underlying
* hardware.
*
* hwtstamps can only be compared against other hwtstamps from
* the same device.
*
* This structure is attached to packets as part of the
* &skb_shared_info. Use skb_hwtstamps() to get a pointer.
*/
struct skb_shared_hwtstamps {
ktime_t hwtstamp;
ktime_t syststamp;
};
/**
* skb_shared_tx - instructions for time stamping of outgoing packets
*
* @hardware: generate hardware time stamp
* @software: generate software time stamp
* @in_progress: device driver is going to provide
* hardware time stamp
*
* These flags are attached to packets as part of the
* &skb_shared_info. Use skb_tx() to get a pointer.
*/
union skb_shared_tx {
struct {
__u8 hardware:1,
software:1,
in_progress:1;
};
__u8 flags;
};
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
@ -143,10 +194,12 @@ struct skb_shared_info {
unsigned short gso_segs;
unsigned short gso_type;
__be32 ip6_frag_id;
union skb_shared_tx tx_flags;
#ifdef CONFIG_HAS_DMA
unsigned int num_dma_maps;
#endif
struct sk_buff *frag_list;
struct skb_shared_hwtstamps hwtstamps;
skb_frag_t frags[MAX_SKB_FRAGS];
#ifdef CONFIG_HAS_DMA
dma_addr_t dma_maps[MAX_SKB_FRAGS + 1];
@ -465,6 +518,16 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
/* Internal */
#define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
{
return &skb_shinfo(skb)->hwtstamps;
}
static inline union skb_shared_tx *skb_tx(struct sk_buff *skb)
{
return &skb_shinfo(skb)->tx_flags;
}
/**
* skb_queue_empty - check if a queue is empty
* @list: queue head
@ -1730,6 +1793,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
extern void skb_init(void);
static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
{
return skb->tstamp;
}
/**
* skb_get_timestamp - get timestamp from a skb
* @skb: skb to get stamp from
@ -1739,11 +1807,18 @@ extern void skb_init(void);
* This function converts the offset back to a struct timeval and stores
* it in stamp.
*/
static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
static inline void skb_get_timestamp(const struct sk_buff *skb,
struct timeval *stamp)
{
*stamp = ktime_to_timeval(skb->tstamp);
}
static inline void skb_get_timestampns(const struct sk_buff *skb,
struct timespec *stamp)
{
*stamp = ktime_to_timespec(skb->tstamp);
}
static inline void __net_timestamp(struct sk_buff *skb)
{
skb->tstamp = ktime_get_real();
@ -1759,6 +1834,20 @@ static inline ktime_t net_invalid_timestamp(void)
return ktime_set(0, 0);
}
/**
* skb_tstamp_tx - queue clone of skb with send time stamps
* @orig_skb: the original outgoing packet
* @hwtstamps: hardware time stamps, may be NULL if not available
*
* If the skb has a socket associated, then this function clones the
* skb (thus sharing the actual data and optional structures), stores
* the optional hardware time stamping information (if non NULL) or
* generates a software time stamp (otherwise), then queues the clone
* to the error queue of the socket. Errors are silently ignored.
*/
extern void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps);
extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
extern __sum16 __skb_checksum_complete(struct sk_buff *skb);

View file

@ -1672,10 +1672,21 @@ static int dev_gso_segment(struct sk_buff *skb)
return 0;
}
static void tstamp_tx(struct sk_buff *skb)
{
union skb_shared_tx *shtx =
skb_tx(skb);
if (unlikely(shtx->software &&
!shtx->in_progress)) {
skb_tstamp_tx(skb, NULL);
}
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc;
prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
@ -1689,13 +1700,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
return ops->ndo_start_xmit(skb, dev);
rc = ops->ndo_start_xmit(skb, dev);
/*
* TODO: if skb_orphan() was called by
* dev->hard_start_xmit() (for example, the unmodified
* igb driver does that; bnx2 doesn't), then
* skb_tx_software_timestamp() will be unable to send
* back the time stamp.
*
* How can this be prevented? Always create another
* reference to the socket before calling
* dev->hard_start_xmit()? Prevent that skb_orphan()
* does anything in dev->hard_start_xmit() by clearing
* the skb destructor before the call and restoring it
* afterwards, then doing the skb_orphan() ourselves?
*/
if (likely(!rc))
tstamp_tx(skb);
return rc;
}
gso:
do {
struct sk_buff *nskb = skb->next;
int rc;
skb->next = nskb->next;
nskb->next = NULL;
@ -1705,6 +1732,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->next = nskb;
return rc;
}
tstamp_tx(skb);
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);

View file

@ -55,6 +55,7 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <net/protocol.h>
#include <net/dst.h>
@ -215,7 +216,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo->gso_segs = 0;
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->tx_flags.flags = 0;
shinfo->frag_list = NULL;
memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
if (fclone) {
struct sk_buff *child = skb + 1;
@ -2945,6 +2948,44 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
}
EXPORT_SYMBOL_GPL(skb_cow_data);
void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps)
{
struct sock *sk = orig_skb->sk;
struct sock_exterr_skb *serr;
struct sk_buff *skb;
int err;
if (!sk)
return;
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
if (hwtstamps) {
*skb_hwtstamps(skb) =
*hwtstamps;
} else {
/*
* no hardware time stamps available,
* so keep the skb_shared_tx and only
* store software time stamp
*/
skb->tstamp = ktime_get_real();
}
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
err = sock_queue_err_skb(sk, skb);
if (err)
kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_tstamp_tx);
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
* @skb: the skb to set