Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 Red Hat |
| 3 | * based in parts on udlfb.c: |
| 4 | * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> |
| 5 | * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> |
| 6 | * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> |
| 7 | * |
| 8 | * This file is subject to the terms and conditions of the GNU General Public |
| 9 | * License v2. See the file COPYING in the main directory of this archive for |
| 10 | * more details. |
| 11 | */ |
| 12 | |
| 13 | #include <linux/module.h> |
| 14 | #include <linux/slab.h> |
| 15 | #include <linux/fb.h> |
| 16 | #include <linux/prefetch.h> |
| 17 | |
David Howells | 760285e | 2012-10-02 18:01:07 +0100 | [diff] [blame] | 18 | #include <drm/drmP.h> |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 19 | #include "udl_drv.h" |
| 20 | |
| 21 | #define MAX_CMD_PIXELS 255 |
| 22 | |
| 23 | #define RLX_HEADER_BYTES 7 |
| 24 | #define MIN_RLX_PIX_BYTES 4 |
| 25 | #define MIN_RLX_CMD_BYTES (RLX_HEADER_BYTES + MIN_RLX_PIX_BYTES) |
| 26 | |
| 27 | #define RLE_HEADER_BYTES 6 |
| 28 | #define MIN_RLE_PIX_BYTES 3 |
| 29 | #define MIN_RLE_CMD_BYTES (RLE_HEADER_BYTES + MIN_RLE_PIX_BYTES) |
| 30 | |
| 31 | #define RAW_HEADER_BYTES 6 |
| 32 | #define MIN_RAW_PIX_BYTES 2 |
| 33 | #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES) |
| 34 | |
| 35 | /* |
| 36 | * Trims identical data from front and back of line |
| 37 | * Sets new front buffer address and width |
| 38 | * And returns byte count of identical pixels |
| 39 | * Assumes CPU natural alignment (unsigned long) |
| 40 | * for back and front buffer ptrs and width |
| 41 | */ |
| 42 | #if 0 |
| 43 | static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) |
| 44 | { |
| 45 | int j, k; |
| 46 | const unsigned long *back = (const unsigned long *) bback; |
| 47 | const unsigned long *front = (const unsigned long *) *bfront; |
| 48 | const int width = *width_bytes / sizeof(unsigned long); |
| 49 | int identical = width; |
| 50 | int start = width; |
| 51 | int end = width; |
| 52 | |
| 53 | prefetch((void *) front); |
| 54 | prefetch((void *) back); |
| 55 | |
| 56 | for (j = 0; j < width; j++) { |
| 57 | if (back[j] != front[j]) { |
| 58 | start = j; |
| 59 | break; |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | for (k = width - 1; k > j; k--) { |
| 64 | if (back[k] != front[k]) { |
| 65 | end = k+1; |
| 66 | break; |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | identical = start + (width - end); |
| 71 | *bfront = (u8 *) &front[start]; |
| 72 | *width_bytes = (end - start) * sizeof(unsigned long); |
| 73 | |
| 74 | return identical * sizeof(unsigned long); |
| 75 | } |
| 76 | #endif |
| 77 | |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 78 | static inline u16 pixel32_to_be16(const uint32_t pixel) |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 79 | { |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 80 | return (((pixel >> 3) & 0x001f) | |
| 81 | ((pixel >> 5) & 0x07e0) | |
| 82 | ((pixel >> 8) & 0xf800)); |
| 83 | } |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 84 | |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 85 | static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 86 | { |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 87 | u16 pixel_val16 = 0; |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 88 | if (bpp == 2) |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 89 | pixel_val16 = *(const uint16_t *)pixel; |
| 90 | else if (bpp == 4) |
| 91 | pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); |
| 92 | return pixel_val16; |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 93 | } |
| 94 | |
| 95 | /* |
| 96 | * Render a command stream for an encoded horizontal line segment of pixels. |
| 97 | * |
| 98 | * A command buffer holds several commands. |
| 99 | * It always begins with a fresh command header |
| 100 | * (the protocol doesn't require this, but we enforce it to allow |
| 101 | * multiple buffers to be potentially encoded and sent in parallel). |
| 102 | * A single command encodes one contiguous horizontal line of pixels |
| 103 | * |
| 104 | * The function relies on the client to do all allocation, so that |
| 105 | * rendering can be done directly to output buffers (e.g. USB URBs). |
| 106 | * The function fills the supplied command buffer, providing information |
| 107 | * on where it left off, so the client may call in again with additional |
| 108 | * buffers if the line will take several buffers to complete. |
| 109 | * |
| 110 | * A single command can transmit a maximum of 256 pixels, |
| 111 | * regardless of the compression ratio (protocol design limit). |
| 112 | * To the hardware, 0 for a size byte means 256 |
| 113 | * |
| 114 | * Rather than 256 pixel commands which are either rl or raw encoded, |
| 115 | * the rlx command simply assumes alternating raw and rl spans within one cmd. |
| 116 | * This has a slightly larger header overhead, but produces more even results. |
| 117 | * It also processes all data (read and write) in a single pass. |
| 118 | * Performance benchmarks of common cases show it having just slightly better |
| 119 | * compression than 256 pixel raw or rle commands, with similar CPU consumpion. |
| 120 | * But for very rl friendly data, will compress not quite as well. |
| 121 | */ |
| 122 | static void udl_compress_hline16( |
| 123 | const u8 **pixel_start_ptr, |
| 124 | const u8 *const pixel_end, |
| 125 | uint32_t *device_address_ptr, |
| 126 | uint8_t **command_buffer_ptr, |
| 127 | const uint8_t *const cmd_buffer_end, int bpp) |
| 128 | { |
| 129 | const u8 *pixel = *pixel_start_ptr; |
| 130 | uint32_t dev_addr = *device_address_ptr; |
| 131 | uint8_t *cmd = *command_buffer_ptr; |
| 132 | |
| 133 | while ((pixel_end > pixel) && |
| 134 | (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) { |
Sachin Kamat | 74401b1 | 2012-09-22 06:22:17 +0000 | [diff] [blame] | 135 | uint8_t *raw_pixels_count_byte = NULL; |
| 136 | uint8_t *cmd_pixels_count_byte = NULL; |
| 137 | const u8 *raw_pixel_start = NULL; |
| 138 | const u8 *cmd_pixel_start, *cmd_pixel_end = NULL; |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 139 | uint16_t pixel_val16; |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 140 | |
| 141 | prefetchw((void *) cmd); /* pull in one cache line at least */ |
| 142 | |
| 143 | *cmd++ = 0xaf; |
| 144 | *cmd++ = 0x6b; |
| 145 | *cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF); |
| 146 | *cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF); |
| 147 | *cmd++ = (uint8_t) ((dev_addr) & 0xFF); |
| 148 | |
| 149 | cmd_pixels_count_byte = cmd++; /* we'll know this later */ |
| 150 | cmd_pixel_start = pixel; |
| 151 | |
| 152 | raw_pixels_count_byte = cmd++; /* we'll know this later */ |
| 153 | raw_pixel_start = pixel; |
| 154 | |
| 155 | cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1, |
| 156 | min((int)(pixel_end - pixel) / bpp, |
| 157 | (int)(cmd_buffer_end - cmd) / 2))) * bpp; |
| 158 | |
| 159 | prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 160 | pixel_val16 = get_pixel_val16(pixel, bpp); |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 161 | |
| 162 | while (pixel < cmd_pixel_end) { |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 163 | const u8 *const start = pixel; |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 164 | const uint16_t repeating_pixel_val16 = pixel_val16; |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 165 | |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 166 | *(uint16_t *)cmd = cpu_to_be16(pixel_val16); |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 167 | |
| 168 | cmd += 2; |
| 169 | pixel += bpp; |
| 170 | |
Haixia Shi | 8658444 | 2015-01-30 10:51:14 -0800 | [diff] [blame] | 171 | while (pixel < cmd_pixel_end) { |
| 172 | pixel_val16 = get_pixel_val16(pixel, bpp); |
| 173 | if (pixel_val16 != repeating_pixel_val16) |
| 174 | break; |
| 175 | pixel += bpp; |
| 176 | } |
| 177 | |
| 178 | if (unlikely(pixel > start + bpp)) { |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 179 | /* go back and fill in raw pixel count */ |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 180 | *raw_pixels_count_byte = (((start - |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 181 | raw_pixel_start) / bpp) + 1) & 0xFF; |
| 182 | |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 183 | /* immediately after raw data is repeat byte */ |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 184 | *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 185 | |
| 186 | /* Then start another raw pixel span */ |
| 187 | raw_pixel_start = pixel; |
| 188 | raw_pixels_count_byte = cmd++; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | if (pixel > raw_pixel_start) { |
| 193 | /* finalize last RAW span */ |
| 194 | *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; |
| 195 | } |
| 196 | |
| 197 | *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; |
| 198 | dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; |
| 199 | } |
| 200 | |
| 201 | if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { |
| 202 | /* Fill leftover bytes with no-ops */ |
| 203 | if (cmd_buffer_end > cmd) |
| 204 | memset(cmd, 0xAF, cmd_buffer_end - cmd); |
| 205 | cmd = (uint8_t *) cmd_buffer_end; |
| 206 | } |
| 207 | |
| 208 | *command_buffer_ptr = cmd; |
| 209 | *pixel_start_ptr = pixel; |
| 210 | *device_address_ptr = dev_addr; |
| 211 | |
| 212 | return; |
| 213 | } |
| 214 | |
| 215 | /* |
| 216 | * There are 3 copies of every pixel: The front buffer that the fbdev |
| 217 | * client renders to, the actual framebuffer across the USB bus in hardware |
| 218 | * (that we can only write to, slowly, and can never read), and (optionally) |
| 219 | * our shadow copy that tracks what's been sent to that hardware buffer. |
| 220 | */ |
| 221 | int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, |
| 222 | const char *front, char **urb_buf_ptr, |
Dave Airlie | 3916e1d | 2012-11-01 13:47:09 +1000 | [diff] [blame] | 223 | u32 byte_offset, u32 device_byte_offset, |
| 224 | u32 byte_width, |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 225 | int *ident_ptr, int *sent_ptr) |
| 226 | { |
| 227 | const u8 *line_start, *line_end, *next_pixel; |
Dave Airlie | 3916e1d | 2012-11-01 13:47:09 +1000 | [diff] [blame] | 228 | u32 base16 = 0 + (device_byte_offset / bpp) * 2; |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 229 | struct urb *urb = *urb_ptr; |
| 230 | u8 *cmd = *urb_buf_ptr; |
| 231 | u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; |
| 232 | |
Chris Wilson | e90a4ea | 2013-01-18 16:31:14 +0000 | [diff] [blame] | 233 | BUG_ON(!(bpp == 2 || bpp == 4)); |
| 234 | |
Dave Airlie | 5320918 | 2010-12-15 07:14:24 +1000 | [diff] [blame] | 235 | line_start = (u8 *) (front + byte_offset); |
| 236 | next_pixel = line_start; |
| 237 | line_end = next_pixel + byte_width; |
| 238 | |
| 239 | while (next_pixel < line_end) { |
| 240 | |
| 241 | udl_compress_hline16(&next_pixel, |
| 242 | line_end, &base16, |
| 243 | (u8 **) &cmd, (u8 *) cmd_end, bpp); |
| 244 | |
| 245 | if (cmd >= cmd_end) { |
| 246 | int len = cmd - (u8 *) urb->transfer_buffer; |
| 247 | if (udl_submit_urb(dev, urb, len)) |
| 248 | return 1; /* lost pixels is set */ |
| 249 | *sent_ptr += len; |
| 250 | urb = udl_get_urb(dev); |
| 251 | if (!urb) |
| 252 | return 1; /* lost_pixels is set */ |
| 253 | *urb_ptr = urb; |
| 254 | cmd = urb->transfer_buffer; |
| 255 | cmd_end = &cmd[urb->transfer_buffer_length]; |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | *urb_buf_ptr = cmd; |
| 260 | |
| 261 | return 0; |
| 262 | } |
| 263 | |