drm/udl: Inline memcmp() for RLE compression of xfer
As we use a variable length the compiler does not realise that it is a fixed value of either 2 or 4 bytes. Instead of performing the inline comparison itself, the compiler inserts a function call to the generic memcmp routine which is optimised for long comparisons of variable length. That turns out to be quite expensive... Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
bcb39af448
commit
e90a4ea534
1 changed files with 27 additions and 17 deletions
|
@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline u16 pixel32_to_be16p(const uint8_t *pixel)
|
static inline u16 pixel32_to_be16(const uint32_t pixel)
|
||||||
{
|
{
|
||||||
uint32_t pix = *(uint32_t *)pixel;
|
return (((pixel >> 3) & 0x001f) |
|
||||||
u16 retval;
|
((pixel >> 5) & 0x07e0) |
|
||||||
|
((pixel >> 8) & 0xf800));
|
||||||
|
}
|
||||||
|
|
||||||
retval = (((pix >> 3) & 0x001f) |
|
static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
|
||||||
((pix >> 5) & 0x07e0) |
|
{
|
||||||
((pix >> 8) & 0xf800));
|
if (bpp == 2)
|
||||||
return retval;
|
return *(const uint16_t *)pixel == repeat;
|
||||||
|
else
|
||||||
|
return *(const uint32_t *)pixel == repeat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -152,29 +156,33 @@ static void udl_compress_hline16(
|
||||||
prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
|
prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
|
||||||
|
|
||||||
while (pixel < cmd_pixel_end) {
|
while (pixel < cmd_pixel_end) {
|
||||||
const u8 * const repeating_pixel = pixel;
|
const u8 *const start = pixel;
|
||||||
|
u32 repeating_pixel;
|
||||||
|
|
||||||
if (bpp == 2)
|
if (bpp == 2) {
|
||||||
*(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel);
|
repeating_pixel = *(uint16_t *)pixel;
|
||||||
else if (bpp == 4)
|
*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
|
||||||
*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel));
|
} else {
|
||||||
|
repeating_pixel = *(uint32_t *)pixel;
|
||||||
|
*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
|
||||||
|
}
|
||||||
|
|
||||||
cmd += 2;
|
cmd += 2;
|
||||||
pixel += bpp;
|
pixel += bpp;
|
||||||
|
|
||||||
if (unlikely((pixel < cmd_pixel_end) &&
|
if (unlikely((pixel < cmd_pixel_end) &&
|
||||||
(!memcmp(pixel, repeating_pixel, bpp)))) {
|
(pixel_repeats(pixel, repeating_pixel, bpp)))) {
|
||||||
/* go back and fill in raw pixel count */
|
/* go back and fill in raw pixel count */
|
||||||
*raw_pixels_count_byte = (((repeating_pixel -
|
*raw_pixels_count_byte = (((start -
|
||||||
raw_pixel_start) / bpp) + 1) & 0xFF;
|
raw_pixel_start) / bpp) + 1) & 0xFF;
|
||||||
|
|
||||||
while ((pixel < cmd_pixel_end)
|
while ((pixel < cmd_pixel_end) &&
|
||||||
&& (!memcmp(pixel, repeating_pixel, bpp))) {
|
(pixel_repeats(pixel, repeating_pixel, bpp))) {
|
||||||
pixel += bpp;
|
pixel += bpp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* immediately after raw data is repeat byte */
|
/* immediately after raw data is repeat byte */
|
||||||
*cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF;
|
*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
|
||||||
|
|
||||||
/* Then start another raw pixel span */
|
/* Then start another raw pixel span */
|
||||||
raw_pixel_start = pixel;
|
raw_pixel_start = pixel;
|
||||||
|
@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
|
||||||
u8 *cmd = *urb_buf_ptr;
|
u8 *cmd = *urb_buf_ptr;
|
||||||
u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
|
u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
|
||||||
|
|
||||||
|
BUG_ON(!(bpp == 2 || bpp == 4));
|
||||||
|
|
||||||
line_start = (u8 *) (front + byte_offset);
|
line_start = (u8 *) (front + byte_offset);
|
||||||
next_pixel = line_start;
|
next_pixel = line_start;
|
||||||
line_end = next_pixel + byte_width;
|
line_end = next_pixel + byte_width;
|
||||||
|
|
Loading…
Reference in a new issue