crypto: lzo - try kmalloc() before vmalloc()

zswap allocates one LZO context per online cpu.

Using vmalloc() for small (16KB) memory areas has drawback of slowing
down /proc/vmallocinfo and /proc/meminfo reads, TLB pressure and poor
NUMA locality, as default NUMA policy at boot time is to interleave
pages :

edumazet:~# grep lzo /proc/vmallocinfo | head -4
0xffffc90006062000-0xffffc90006067000   20480 lzo_init+0x1b/0x30 pages=4 vmalloc N0=2 N1=2
0xffffc90006067000-0xffffc9000606c000   20480 lzo_init+0x1b/0x30 pages=4 vmalloc N0=2 N1=2
0xffffc9000606c000-0xffffc90006071000   20480 lzo_init+0x1b/0x30 pages=4 vmalloc N0=2 N1=2
0xffffc90006071000-0xffffc90006076000   20480 lzo_init+0x1b/0x30 pages=4 vmalloc N0=2 N1=2

This patch tries a regular kmalloc() and fallback to vmalloc in case
memory is too fragmented.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/lzo.c b/crypto/lzo.c
index 1c2aa69..252e791d 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/crypto.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/lzo.h>
 
 struct lzo_ctx {
@@ -30,7 +31,10 @@
 {
 	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS);
+	ctx->lzo_comp_mem = kmalloc(LZO1X_MEM_COMPRESS,
+				    GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!ctx->lzo_comp_mem)
+		ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS);
 	if (!ctx->lzo_comp_mem)
 		return -ENOMEM;
 
@@ -41,7 +45,10 @@
 {
 	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	vfree(ctx->lzo_comp_mem);
+	if (is_vmalloc_addr(ctx->lzo_comp_mem))
+		vfree(ctx->lzo_comp_mem);
+	else
+		kfree(ctx->lzo_comp_mem);
 }
 
 static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,