From 2407d60872dd2a95404c6048f775f3b64d438f4b Mon Sep 17 00:00:00 2001 From: Tan Swee Heng Date: Fri, 23 Nov 2007 19:45:00 +0800 Subject: [CRYPTO] salsa20: Salsa20 stream cipher This patch implements the Salsa20 stream cipher using the blkcipher interface. The core cipher code comes from Daniel Bernstein's submission to eSTREAM: http://www.ecrypt.eu.org/stream/svn/viewcvs.cgi/ecrypt/trunk/submissions/salsa20/full/ref/ The test vectors comes from: http://www.ecrypt.eu.org/stream/svn/viewcvs.cgi/ecrypt/trunk/submissions/salsa20/full/ It has been tested successfully with "modprobe tcrypt mode=34" on an UML instance. Signed-off-by: Tan Swee Heng Signed-off-by: Herbert Xu --- crypto/salsa20_generic.c | 243 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 crypto/salsa20_generic.c (limited to 'crypto/salsa20_generic.c') diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c new file mode 100644 index 00000000000..b49328afcf0 --- /dev/null +++ b/crypto/salsa20_generic.c @@ -0,0 +1,243 @@ +/* + * Salsa20: Salsa20 stream cipher algorithm + * + * Copyright (c) 2007 Tan Swee Heng + * + * Derived from: + * - salsa20.c: Public domain C code by Daniel J. Bernstein + * + * Salsa20 is a stream cipher candidate in eSTREAM, the ECRYPT Stream + * Cipher Project. It is designed by Daniel J. Bernstein . + * More information about eSTREAM and Salsa20 can be found here: + * http://www.ecrypt.eu.org/stream/ + * http://cr.yp.to/snuffle.html + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SALSA20_IV_SIZE 8U +#define SALSA20_MIN_KEY_SIZE 16U +#define SALSA20_MAX_KEY_SIZE 32U + +/* + * Start of code taken from D. J. Bernstein's reference implementation. + * With some modifications and optimizations made to suit our needs. + */ + +/* +salsa20-ref.c version 20051118 +D. J. Bernstein +Public domain. +*/ + +#define ROTATE(v,n) (((v) << (n)) | ((v) >> (32 - (n)))) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) +#define U32TO8_LITTLE(p, v) \ + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24) ) + +struct salsa20_ctx +{ + u32 input[16]; +}; + +static void salsa20_wordtobyte(u8 output[64], const u32 input[16]) +{ + u32 x[16]; + int i; + + memcpy(x, input, sizeof(x)); + for (i = 20; i > 0; i -= 2) { + x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 0],x[12]), 7)); + x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[ 4],x[ 0]), 9)); + x[12] = XOR(x[12],ROTATE(PLUS(x[ 8],x[ 4]),13)); + x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[12],x[ 8]),18)); + x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 5],x[ 1]), 7)); + x[13] = XOR(x[13],ROTATE(PLUS(x[ 9],x[ 5]), 9)); + x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[13],x[ 9]),13)); + x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 1],x[13]),18)); + x[14] = XOR(x[14],ROTATE(PLUS(x[10],x[ 6]), 7)); + x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[14],x[10]), 9)); + x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 2],x[14]),13)); + x[10] = XOR(x[10],ROTATE(PLUS(x[ 6],x[ 2]),18)); + x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[15],x[11]), 7)); + x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 3],x[15]), 9)); + x[11] = XOR(x[11],ROTATE(PLUS(x[ 7],x[ 3]),13)); + x[15] = XOR(x[15],ROTATE(PLUS(x[11],x[ 7]),18)); + x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[ 0],x[ 3]), 7)); + x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[ 1],x[ 0]), 9)); + x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[ 2],x[ 1]),13)); + x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[ 3],x[ 2]),18)); + x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 5],x[ 4]), 7)); + x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 6],x[ 5]), 9)); + x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 7],x[ 6]),13)); + x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 4],x[ 7]),18)); + x[11] = XOR(x[11],ROTATE(PLUS(x[10],x[ 9]), 7)); + x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[11],x[10]), 9)); + x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 8],x[11]),13)); + x[10] = XOR(x[10],ROTATE(PLUS(x[ 9],x[ 8]),18)); + x[12] = XOR(x[12],ROTATE(PLUS(x[15],x[14]), 7)); + x[13] = XOR(x[13],ROTATE(PLUS(x[12],x[15]), 9)); + x[14] = XOR(x[14],ROTATE(PLUS(x[13],x[12]),13)); + x[15] = XOR(x[15],ROTATE(PLUS(x[14],x[13]),18)); + } + for (i = 0; i < 16; ++i) + x[i] = PLUS(x[i],input[i]); + for (i = 0; i < 16; ++i) + U32TO8_LITTLE(output + 4 * i,x[i]); +} + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes) +{ + const char *constants; + + ctx->input[1] = U8TO32_LITTLE(k + 0); + ctx->input[2] = U8TO32_LITTLE(k + 4); + ctx->input[3] = U8TO32_LITTLE(k + 8); + ctx->input[4] = U8TO32_LITTLE(k + 12); + if (kbytes == 32) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbytes == 16 */ + constants = tau; + } + ctx->input[11] = U8TO32_LITTLE(k + 0); + ctx->input[12] = U8TO32_LITTLE(k + 4); + ctx->input[13] = U8TO32_LITTLE(k + 8); + ctx->input[14] = U8TO32_LITTLE(k + 12); + ctx->input[0] = U8TO32_LITTLE(constants + 0); + ctx->input[5] = U8TO32_LITTLE(constants + 4); + ctx->input[10] = U8TO32_LITTLE(constants + 8); + ctx->input[15] = U8TO32_LITTLE(constants + 12); +} + +static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv) +{ + ctx->input[6] = U8TO32_LITTLE(iv + 0); + ctx->input[7] = U8TO32_LITTLE(iv + 4); + ctx->input[8] = 0; + ctx->input[9] = 0; +} + +static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst, + const u8 *src, unsigned int bytes) +{ + u8 buf[64]; + int i; + + if (dst != src) + memcpy(dst, src, bytes); + + while (bytes) { + salsa20_wordtobyte(buf, ctx->input); + + ctx->input[8] = PLUSONE(ctx->input[8]); + if (!ctx->input[8]) + ctx->input[9] = PLUSONE(ctx->input[9]); + + if (bytes <= 64) { + for (i = 0; i < bytes/4; ++i) + ((u32*)dst)[i] ^= ((u32*)buf)[i]; + for (i = bytes - bytes % 4; i < bytes; ++i) + dst[i] ^= buf[i]; + return; + } + + for (i = 0; i < 64/4; ++i) + ((u32*)dst)[i] ^= ((u32*)buf)[i]; + bytes -= 64; + dst += 64; + } +} + +/* + * End of code taken from D. J. Bernstein's reference implementation. + */ + +static int setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keysize) +{ + struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); + salsa20_keysetup(ctx, key, keysize); + return 0; +} + +static int encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + salsa20_ivsetup(ctx, walk.iv); + salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, + walk.src.virt.addr, nbytes); + + err = blkcipher_walk_done(desc, &walk, 0); + return err; +} + +static struct crypto_alg alg = { + .cra_name = "salsa20", + .cra_driver_name = "salsa20-generic", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_type = &crypto_blkcipher_type, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct salsa20_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .blkcipher = { + .setkey = setkey, + .encrypt = encrypt, + .decrypt = encrypt, + .min_keysize = SALSA20_MIN_KEY_SIZE, + .max_keysize = SALSA20_MAX_KEY_SIZE, + .ivsize = SALSA20_IV_SIZE, + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); +MODULE_ALIAS("salsa20"); -- cgit From eb6f13eb9f812f5812ed5d14f241309da369dee6 Mon Sep 17 00:00:00 2001 From: Tan Swee Heng Date: Fri, 7 Dec 2007 16:38:45 +0800 Subject: [CRYPTO] salsa20_generic: Fix multi-page processing This patch fixes the multi-page processing bug that affects large test vectors (the same bug that previously affected ctr.c). There is an optimization for the case walk.nbytes == nbytes. Also we now use crypto_xor() instead of adhoc XOR routines. Signed-off-by: Tan Swee Heng Signed-off-by: Herbert Xu --- crypto/salsa20_generic.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'crypto/salsa20_generic.c') diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index b49328afcf0..1fa4e4ddcab 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -143,7 +143,6 @@ static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst, const u8 *src, unsigned int bytes) { u8 buf[64]; - int i; if (dst != src) memcpy(dst, src, bytes); @@ -156,15 +155,11 @@ static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst, ctx->input[9] = PLUSONE(ctx->input[9]); if (bytes <= 64) { - for (i = 0; i < bytes/4; ++i) - ((u32*)dst)[i] ^= ((u32*)buf)[i]; - for (i = bytes - bytes % 4; i < bytes; ++i) - dst[i] ^= buf[i]; + crypto_xor(dst, buf, bytes); return; } - for (i = 0; i < 64/4; ++i) - ((u32*)dst)[i] ^= ((u32*)buf)[i]; + crypto_xor(dst, buf, 64); bytes -= 64; dst += 64; } @@ -192,13 +187,30 @@ static int encrypt(struct blkcipher_desc *desc, int err; blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = blkcipher_walk_virt_block(desc, &walk, 64); salsa20_ivsetup(ctx, walk.iv); - salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + if (likely(walk.nbytes == nbytes)) + { + salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, + walk.src.virt.addr, nbytes); + return blkcipher_walk_done(desc, &walk, 0); + } + + while (walk.nbytes >= 64) { + salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes - (walk.nbytes % 64)); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); + } + + if (walk.nbytes) { + salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; } -- cgit