blob: 2ffa1031c91f67483366f2a226397b2a43ffac1d [file] [log] [blame]
Dan Streetmaned70b472015-05-07 13:49:21 -04001/*
2 * Cryptographic API for the NX-842 hardware compression.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * Copyright (C) IBM Corporation, 2011-2015
15 *
16 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
17 * Seth Jennings <sjenning@linux.vnet.ibm.com>
18 *
19 * Rewrite: Dan Streetman <ddstreet@ieee.org>
20 *
21 * This is an interface to the NX-842 compression hardware in PowerPC
22 * processors. Most of the complexity of this drvier is due to the fact that
23 * the NX-842 compression hardware requires the input and output data buffers
24 * to be specifically aligned, to be a specific multiple in length, and within
25 * specific minimum and maximum lengths. Those restrictions, provided by the
26 * nx-842 driver via nx842_constraints, mean this driver must use bounce
27 * buffers and headers to correct misaligned in or out buffers, and to split
28 * input buffers that are too large.
29 *
30 * This driver will fall back to software decompression if the hardware
31 * decompression fails, so this driver's decompression should never fail as
32 * long as the provided compressed buffer is valid. Any compressed buffer
33 * created by this driver will have a header (except ones where the input
34 * perfectly matches the constraints); so users of this driver cannot simply
35 * pass a compressed buffer created by this driver over to the 842 software
36 * decompression library. Instead, users must use this driver to decompress;
37 * if the hardware fails or is unavailable, the compressed buffer will be
38 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
39 * software decompression library.
40 *
41 * This does not fall back to software compression, however, since the caller
42 * of this function is specifically requesting hardware compression; if the
43 * hardware compression fails, the caller can fall back to software
44 * compression, and the raw 842 compressed buffer that the software compressor
45 * creates can be passed to this driver for hardware decompression; any
46 * buffer without our specific header magic is assumed to be a raw 842 buffer
47 * and passed directly to the hardware. Note that the software compression
48 * library will produce a compressed buffer that is incompatible with the
49 * hardware decompressor if the original input buffer length is not a multiple
50 * of 8; if such a compressed buffer is passed to this driver for
51 * decompression, the hardware will reject it and this driver will then pass
52 * it over to the software library for decompression.
53 */
54
55#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
56
57#include <linux/init.h>
58#include <linux/module.h>
59#include <linux/crypto.h>
60#include <linux/vmalloc.h>
61#include <linux/nx842.h>
62#include <linux/sw842.h>
63#include <linux/ratelimit.h>
64
65/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
66 * template (see lib/842/842.h), so this magic number will never appear at
67 * the start of a raw 842 compressed buffer. That is important, as any buffer
68 * passed to us without this magic is assumed to be a raw 842 compressed
69 * buffer, and passed directly to the hardware to decompress.
70 */
71#define NX842_CRYPTO_MAGIC (0xf842)
72#define NX842_CRYPTO_GROUP_MAX (0x20)
73#define NX842_CRYPTO_HEADER_SIZE(g) \
74 (sizeof(struct nx842_crypto_header) + \
75 sizeof(struct nx842_crypto_header_group) * (g))
76#define NX842_CRYPTO_HEADER_MAX_SIZE \
77 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
78
79/* bounce buffer size */
80#define BOUNCE_BUFFER_ORDER (2)
81#define BOUNCE_BUFFER_SIZE \
82 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
83
84/* try longer on comp because we can fallback to sw decomp if hw is busy */
85#define COMP_BUSY_TIMEOUT (250) /* ms */
86#define DECOMP_BUSY_TIMEOUT (50) /* ms */
87
88struct nx842_crypto_header_group {
89 __be16 padding; /* unused bytes at start of group */
90 __be32 compressed_length; /* compressed bytes in group */
91 __be32 uncompressed_length; /* bytes after decompression */
92} __packed;
93
94struct nx842_crypto_header {
95 __be16 magic; /* NX842_CRYPTO_MAGIC */
96 __be16 ignore; /* decompressed end bytes to ignore */
97 u8 groups; /* total groups in this header */
98 struct nx842_crypto_header_group group[];
99} __packed;
100
101struct nx842_crypto_param {
102 u8 *in;
103 unsigned int iremain;
104 u8 *out;
105 unsigned int oremain;
106 unsigned int ototal;
107};
108
109static int update_param(struct nx842_crypto_param *p,
110 unsigned int slen, unsigned int dlen)
111{
112 if (p->iremain < slen)
113 return -EOVERFLOW;
114 if (p->oremain < dlen)
115 return -ENOSPC;
116
117 p->in += slen;
118 p->iremain -= slen;
119 p->out += dlen;
120 p->oremain -= dlen;
121 p->ototal += dlen;
122
123 return 0;
124}
125
126struct nx842_crypto_ctx {
127 u8 *wmem;
128 u8 *sbounce, *dbounce;
129
130 struct nx842_crypto_header header;
131 struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
132};
133
134static int nx842_crypto_init(struct crypto_tfm *tfm)
135{
136 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
137
138 ctx->wmem = kmalloc(NX842_MEM_COMPRESS, GFP_KERNEL);
139 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
140 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
141 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
142 kfree(ctx->wmem);
143 free_page((unsigned long)ctx->sbounce);
144 free_page((unsigned long)ctx->dbounce);
145 return -ENOMEM;
146 }
147
148 return 0;
149}
150
151static void nx842_crypto_exit(struct crypto_tfm *tfm)
152{
153 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
154
155 kfree(ctx->wmem);
156 free_page((unsigned long)ctx->sbounce);
157 free_page((unsigned long)ctx->dbounce);
158}
159
160static int read_constraints(struct nx842_constraints *c)
161{
162 int ret;
163
164 ret = nx842_constraints(c);
165 if (ret) {
166 pr_err_ratelimited("could not get nx842 constraints : %d\n",
167 ret);
168 return ret;
169 }
170
171 /* limit maximum, to always have enough bounce buffer to decompress */
172 if (c->maximum > BOUNCE_BUFFER_SIZE) {
173 c->maximum = BOUNCE_BUFFER_SIZE;
174 pr_info_once("limiting nx842 maximum to %x\n", c->maximum);
175 }
176
177 return 0;
178}
179
180static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
181{
182 int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
183
184 /* compress should have added space for header */
185 if (s > be16_to_cpu(hdr->group[0].padding)) {
186 pr_err("Internal error: no space for header\n");
187 return -EINVAL;
188 }
189
190 memcpy(buf, hdr, s);
191
192 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
193
194 return 0;
195}
196
197static int compress(struct nx842_crypto_ctx *ctx,
198 struct nx842_crypto_param *p,
199 struct nx842_crypto_header_group *g,
200 struct nx842_constraints *c,
201 u16 *ignore,
202 unsigned int hdrsize)
203{
204 unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
205 unsigned int adj_slen = slen;
206 u8 *src = p->in, *dst = p->out;
207 int ret, dskip = 0;
208 ktime_t timeout;
209
210 if (p->iremain == 0)
211 return -EOVERFLOW;
212
213 if (p->oremain == 0 || hdrsize + c->minimum > dlen)
214 return -ENOSPC;
215
216 if (slen % c->multiple)
217 adj_slen = round_up(slen, c->multiple);
218 if (slen < c->minimum)
219 adj_slen = c->minimum;
220 if (slen > c->maximum)
221 adj_slen = slen = c->maximum;
222 if (adj_slen > slen || (u64)src % c->alignment) {
223 adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
224 slen = min(slen, BOUNCE_BUFFER_SIZE);
225 if (adj_slen > slen)
226 memset(ctx->sbounce + slen, 0, adj_slen - slen);
227 memcpy(ctx->sbounce, src, slen);
228 src = ctx->sbounce;
229 slen = adj_slen;
230 pr_debug("using comp sbounce buffer, len %x\n", slen);
231 }
232
233 dst += hdrsize;
234 dlen -= hdrsize;
235
236 if ((u64)dst % c->alignment) {
237 dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
238 dst += dskip;
239 dlen -= dskip;
240 }
241 if (dlen % c->multiple)
242 dlen = round_down(dlen, c->multiple);
243 if (dlen < c->minimum) {
244nospc:
245 dst = ctx->dbounce;
246 dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
247 dlen = round_down(dlen, c->multiple);
248 dskip = 0;
249 pr_debug("using comp dbounce buffer, len %x\n", dlen);
250 }
251 if (dlen > c->maximum)
252 dlen = c->maximum;
253
254 tmplen = dlen;
255 timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
256 do {
257 dlen = tmplen; /* reset dlen, if we're retrying */
258 ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem);
259 /* possibly we should reduce the slen here, instead of
260 * retrying with the dbounce buffer?
261 */
262 if (ret == -ENOSPC && dst != ctx->dbounce)
263 goto nospc;
264 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
265 if (ret)
266 return ret;
267
268 dskip += hdrsize;
269
270 if (dst == ctx->dbounce)
271 memcpy(p->out + dskip, dst, dlen);
272
273 g->padding = cpu_to_be16(dskip);
274 g->compressed_length = cpu_to_be32(dlen);
275 g->uncompressed_length = cpu_to_be32(slen);
276
277 if (p->iremain < slen) {
278 *ignore = slen - p->iremain;
279 slen = p->iremain;
280 }
281
282 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
283 slen, *ignore, dlen, dskip);
284
285 return update_param(p, slen, dskip + dlen);
286}
287
288static int nx842_crypto_compress(struct crypto_tfm *tfm,
289 const u8 *src, unsigned int slen,
290 u8 *dst, unsigned int *dlen)
291{
292 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
293 struct nx842_crypto_header *hdr = &ctx->header;
294 struct nx842_crypto_param p;
295 struct nx842_constraints c;
296 unsigned int groups, hdrsize, h;
297 int ret, n;
298 bool add_header;
299 u16 ignore = 0;
300
Dan Streetmaned70b472015-05-07 13:49:21 -0400301 p.in = (u8 *)src;
302 p.iremain = slen;
303 p.out = dst;
304 p.oremain = *dlen;
305 p.ototal = 0;
306
307 *dlen = 0;
308
309 ret = read_constraints(&c);
310 if (ret)
311 return ret;
312
313 groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
314 DIV_ROUND_UP(p.iremain, c.maximum));
315 hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
316
317 /* skip adding header if the buffers meet all constraints */
318 add_header = (p.iremain % c.multiple ||
319 p.iremain < c.minimum ||
320 p.iremain > c.maximum ||
321 (u64)p.in % c.alignment ||
322 p.oremain % c.multiple ||
323 p.oremain < c.minimum ||
324 p.oremain > c.maximum ||
325 (u64)p.out % c.alignment);
326
327 hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
328 hdr->groups = 0;
329 hdr->ignore = 0;
330
331 while (p.iremain > 0) {
332 n = hdr->groups++;
333 if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
334 return -ENOSPC;
335
336 /* header goes before first group */
337 h = !n && add_header ? hdrsize : 0;
338
339 if (ignore)
340 pr_warn("interal error, ignore is set %x\n", ignore);
341
342 ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
343 if (ret)
344 return ret;
345 }
346
347 if (!add_header && hdr->groups > 1) {
348 pr_err("Internal error: No header but multiple groups\n");
349 return -EINVAL;
350 }
351
352 /* ignore indicates the input stream needed to be padded */
353 hdr->ignore = cpu_to_be16(ignore);
354 if (ignore)
355 pr_debug("marked %d bytes as ignore\n", ignore);
356
357 if (add_header)
358 ret = nx842_crypto_add_header(hdr, dst);
359 if (ret)
360 return ret;
361
362 *dlen = p.ototal;
363
364 pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
365
366 return 0;
367}
368
369static int decompress(struct nx842_crypto_ctx *ctx,
370 struct nx842_crypto_param *p,
371 struct nx842_crypto_header_group *g,
372 struct nx842_constraints *c,
373 u16 ignore,
374 bool usehw)
375{
376 unsigned int slen = be32_to_cpu(g->compressed_length);
377 unsigned int required_len = be32_to_cpu(g->uncompressed_length);
378 unsigned int dlen = p->oremain, tmplen;
379 unsigned int adj_slen = slen;
380 u8 *src = p->in, *dst = p->out;
381 u16 padding = be16_to_cpu(g->padding);
382 int ret, spadding = 0, dpadding = 0;
383 ktime_t timeout;
384
385 if (!slen || !required_len)
386 return -EINVAL;
387
388 if (p->iremain <= 0 || padding + slen > p->iremain)
389 return -EOVERFLOW;
390
391 if (p->oremain <= 0 || required_len - ignore > p->oremain)
392 return -ENOSPC;
393
394 src += padding;
395
396 if (!usehw)
397 goto usesw;
398
399 if (slen % c->multiple)
400 adj_slen = round_up(slen, c->multiple);
401 if (slen < c->minimum)
402 adj_slen = c->minimum;
403 if (slen > c->maximum)
404 goto usesw;
405 if (slen < adj_slen || (u64)src % c->alignment) {
406 /* we can append padding bytes because the 842 format defines
407 * an "end" template (see lib/842/842_decompress.c) and will
408 * ignore any bytes following it.
409 */
410 if (slen < adj_slen)
411 memset(ctx->sbounce + slen, 0, adj_slen - slen);
412 memcpy(ctx->sbounce, src, slen);
413 src = ctx->sbounce;
414 spadding = adj_slen - slen;
415 slen = adj_slen;
416 pr_debug("using decomp sbounce buffer, len %x\n", slen);
417 }
418
419 if (dlen % c->multiple)
420 dlen = round_down(dlen, c->multiple);
421 if (dlen < required_len || (u64)dst % c->alignment) {
422 dst = ctx->dbounce;
423 dlen = min(required_len, BOUNCE_BUFFER_SIZE);
424 pr_debug("using decomp dbounce buffer, len %x\n", dlen);
425 }
426 if (dlen < c->minimum)
427 goto usesw;
428 if (dlen > c->maximum)
429 dlen = c->maximum;
430
431 tmplen = dlen;
432 timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
433 do {
434 dlen = tmplen; /* reset dlen, if we're retrying */
435 ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem);
436 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
437 if (ret) {
438usesw:
439 /* reset everything, sw doesn't have constraints */
440 src = p->in + padding;
441 slen = be32_to_cpu(g->compressed_length);
442 spadding = 0;
443 dst = p->out;
444 dlen = p->oremain;
445 dpadding = 0;
446 if (dlen < required_len) { /* have ignore bytes */
447 dst = ctx->dbounce;
448 dlen = BOUNCE_BUFFER_SIZE;
449 }
450 pr_info_ratelimited("using software 842 decompression\n");
451 ret = sw842_decompress(src, slen, dst, &dlen);
452 }
453 if (ret)
454 return ret;
455
456 slen -= spadding;
457
458 dlen -= ignore;
459 if (ignore)
460 pr_debug("ignoring last %x bytes\n", ignore);
461
462 if (dst == ctx->dbounce)
463 memcpy(p->out, dst, dlen);
464
465 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
466 slen, padding, dlen, ignore);
467
468 return update_param(p, slen + padding, dlen);
469}
470
471static int nx842_crypto_decompress(struct crypto_tfm *tfm,
472 const u8 *src, unsigned int slen,
473 u8 *dst, unsigned int *dlen)
474{
475 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
476 struct nx842_crypto_header *hdr;
477 struct nx842_crypto_param p;
478 struct nx842_constraints c;
479 int n, ret, hdr_len;
480 u16 ignore = 0;
481 bool usehw = true;
482
Dan Streetmaned70b472015-05-07 13:49:21 -0400483 p.in = (u8 *)src;
484 p.iremain = slen;
485 p.out = dst;
486 p.oremain = *dlen;
487 p.ototal = 0;
488
489 *dlen = 0;
490
491 if (read_constraints(&c))
492 usehw = false;
493
494 hdr = (struct nx842_crypto_header *)src;
495
496 /* If it doesn't start with our header magic number, assume it's a raw
497 * 842 compressed buffer and pass it directly to the hardware driver
498 */
499 if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
500 struct nx842_crypto_header_group g = {
501 .padding = 0,
502 .compressed_length = cpu_to_be32(p.iremain),
503 .uncompressed_length = cpu_to_be32(p.oremain),
504 };
505
506 ret = decompress(ctx, &p, &g, &c, 0, usehw);
507 if (ret)
508 return ret;
509
510 *dlen = p.ototal;
511
512 return 0;
513 }
514
515 if (!hdr->groups) {
516 pr_err("header has no groups\n");
517 return -EINVAL;
518 }
519 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
520 pr_err("header has too many groups %x, max %x\n",
521 hdr->groups, NX842_CRYPTO_GROUP_MAX);
522 return -EINVAL;
523 }
524
525 hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
526 if (hdr_len > slen)
527 return -EOVERFLOW;
528
529 memcpy(&ctx->header, src, hdr_len);
530 hdr = &ctx->header;
531
532 for (n = 0; n < hdr->groups; n++) {
533 /* ignore applies to last group */
534 if (n + 1 == hdr->groups)
535 ignore = be16_to_cpu(hdr->ignore);
536
537 ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw);
538 if (ret)
539 return ret;
540 }
541
542 *dlen = p.ototal;
543
544 pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
545
546 return 0;
547}
548
549static struct crypto_alg alg = {
550 .cra_name = "842",
551 .cra_driver_name = "842-nx",
552 .cra_priority = 300,
553 .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
554 .cra_ctxsize = sizeof(struct nx842_crypto_ctx),
555 .cra_module = THIS_MODULE,
556 .cra_init = nx842_crypto_init,
557 .cra_exit = nx842_crypto_exit,
558 .cra_u = { .compress = {
559 .coa_compress = nx842_crypto_compress,
560 .coa_decompress = nx842_crypto_decompress } }
561};
562
563static int __init nx842_crypto_mod_init(void)
564{
565 return crypto_register_alg(&alg);
566}
567module_init(nx842_crypto_mod_init);
568
569static void __exit nx842_crypto_mod_exit(void)
570{
571 crypto_unregister_alg(&alg);
572}
573module_exit(nx842_crypto_mod_exit);
574
575MODULE_LICENSE("GPL");
576MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface");
577MODULE_ALIAS_CRYPTO("842");
578MODULE_ALIAS_CRYPTO("842-nx");
579MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");