[master] 1f631aa Update our vsha256 with the latest improvements from Colin Percival:
Poul-Henning Kamp
phk at FreeBSD.org
Sat May 6 00:52:05 CEST 2017
commit 1f631aa61dd170aa3bdd6485fbdb801c7cc36eb0
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date: Fri May 5 22:50:03 2017 +0000
Update our vsha256 with the latest improvements from Colin Percival:
Retune SHA2 code for improved performance on CPUs with more ILP and
a preference for memory load instructions over large code footprints
with embedded immediate variables.
On amd64 CPUs from 2007-2008 there is not a significant change, but
amd64 CPUs from 2009-2010 get roughly 10% more throughput with this
code; amd64 CPUs from 2011-2012 get roughly 15% more throughput; and
AMD64 CPUs from 2013-2015 get 20-25% more throughput. The Raspberry
Pi 2 increases its throughput by 6-8%.
Sponsored by: Tarsnap Backup Inc.
diff --git a/include/vsha256.h b/include/vsha256.h
index bf42c11..e3852e9 100644
--- a/include/vsha256.h
+++ b/include/vsha256.h
@@ -30,6 +30,7 @@
#define _SHA256_H_
#define SHA256_LEN 32
+#define SHA256_DIGEST_LENGTH 32
typedef struct SHA256Context {
uint32_t state[8];
diff --git a/lib/libvarnish/flint.lnt b/lib/libvarnish/flint.lnt
index 0a014ba..636828a 100644
--- a/lib/libvarnish/flint.lnt
+++ b/lib/libvarnish/flint.lnt
@@ -1,5 +1,10 @@
-efunc(835, SHA256_Transform) // A zero has been given as ___ argument to operator '___'
-efunc(778, SHA256_Transform) // Constant expression evaluates to 0 in operation '___'
+-efunc(661, SHA256_Transform) // Possible access of out-of-bounds pointer
+-efunc(662, SHA256_Transform) // Possible creation of out-of-bounds pointer
+-efunc(662, SHA256_Update) // Possible creation of out-of-bounds pointer
+-efunc(662, be32dec_vect) // Possible creation of out-of-bounds pointer
+-efunc(670, SHA256_Update) // Possible access beyond array for function '___',
+fan // Allow anon unions
-e713 // Loss of precision (assignment) (unsigned long long to long long)
@@ -7,3 +12,4 @@
-dVARNISH_STATE_DIR="foo"
--emacro((835),BINHEAP_NOIDX)
+--emacro((835),O_CLOEXEC)
diff --git a/lib/libvarnish/vsha256.c b/lib/libvarnish/vsha256.c
index 7e73710..c668474 100644
--- a/lib/libvarnish/vsha256.c
+++ b/lib/libvarnish/vsha256.c
@@ -23,7 +23,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * From: $FreeBSD: head/lib/libmd/sha256c.c 154479 2006-01-17 15:35:57Z phk $
+ * From:
+ * $FreeBSD: sys/crypto/sha2/sha256c.c 300966 2016-05-29 17:26:40Z cperciva $
*/
#include "config.h"
@@ -84,7 +85,27 @@ be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
dst[i] = vbe32dec(src + i * 4);
}
-#endif
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* SHA256 round constants. */
+static const uint32_t K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
@@ -97,19 +118,28 @@ be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
/* SHA256 round function */
-#define RND(a, b, c, d, e, f, g, h, k) \
- t0 = h + S1(e) + Ch(e, f, g) + k; \
- t1 = S0(a) + Maj(a, b, c); \
- d += t0; \
- h = t0 + t1;
+#define RND(a, b, c, d, e, f, g, h, k) \
+ do { \
+ h += S1(e) + Ch(e, f, g) + (k); \
+ d += h; \
+ h += S0(a) + Maj(a, b, c); \
+ } while (0)
/* Adjusted round function for rotating state */
-#define RNDr(S, W, i, k) \
+#define RNDr(S, W, i, ii) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
- (W[i] + k))
+ W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define MSCH(W, ii, i) \
+ do { \
+ W[i + ii + 16] = \
+ s1(W[i + ii + 14]) + W[i + ii + 9] + \
+ s0(W[i + ii + 1]) + W[i + ii]; \
+ } while (0)
/*
* SHA256 block compression function. The 256-bit state is transformed via
@@ -120,82 +150,52 @@ SHA256_Transform(uint32_t * state, const unsigned char block[64])
{
uint32_t W[64];
uint32_t S[8];
- uint32_t t0, t1;
int i;
- /* 1. Prepare message schedule W. */
+ /* 1. Prepare the first part of the message schedule W. */
be32dec_vect(W, block, 64);
- for (i = 16; i < 64; i++)
- W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
- RNDr(S, W, 0, 0x428a2f98);
- RNDr(S, W, 1, 0x71374491);
- RNDr(S, W, 2, 0xb5c0fbcf);
- RNDr(S, W, 3, 0xe9b5dba5);
- RNDr(S, W, 4, 0x3956c25b);
- RNDr(S, W, 5, 0x59f111f1);
- RNDr(S, W, 6, 0x923f82a4);
- RNDr(S, W, 7, 0xab1c5ed5);
- RNDr(S, W, 8, 0xd807aa98);
- RNDr(S, W, 9, 0x12835b01);
- RNDr(S, W, 10, 0x243185be);
- RNDr(S, W, 11, 0x550c7dc3);
- RNDr(S, W, 12, 0x72be5d74);
- RNDr(S, W, 13, 0x80deb1fe);
- RNDr(S, W, 14, 0x9bdc06a7);
- RNDr(S, W, 15, 0xc19bf174);
- RNDr(S, W, 16, 0xe49b69c1);
- RNDr(S, W, 17, 0xefbe4786);
- RNDr(S, W, 18, 0x0fc19dc6);
- RNDr(S, W, 19, 0x240ca1cc);
- RNDr(S, W, 20, 0x2de92c6f);
- RNDr(S, W, 21, 0x4a7484aa);
- RNDr(S, W, 22, 0x5cb0a9dc);
- RNDr(S, W, 23, 0x76f988da);
- RNDr(S, W, 24, 0x983e5152);
- RNDr(S, W, 25, 0xa831c66d);
- RNDr(S, W, 26, 0xb00327c8);
- RNDr(S, W, 27, 0xbf597fc7);
- RNDr(S, W, 28, 0xc6e00bf3);
- RNDr(S, W, 29, 0xd5a79147);
- RNDr(S, W, 30, 0x06ca6351);
- RNDr(S, W, 31, 0x14292967);
- RNDr(S, W, 32, 0x27b70a85);
- RNDr(S, W, 33, 0x2e1b2138);
- RNDr(S, W, 34, 0x4d2c6dfc);
- RNDr(S, W, 35, 0x53380d13);
- RNDr(S, W, 36, 0x650a7354);
- RNDr(S, W, 37, 0x766a0abb);
- RNDr(S, W, 38, 0x81c2c92e);
- RNDr(S, W, 39, 0x92722c85);
- RNDr(S, W, 40, 0xa2bfe8a1);
- RNDr(S, W, 41, 0xa81a664b);
- RNDr(S, W, 42, 0xc24b8b70);
- RNDr(S, W, 43, 0xc76c51a3);
- RNDr(S, W, 44, 0xd192e819);
- RNDr(S, W, 45, 0xd6990624);
- RNDr(S, W, 46, 0xf40e3585);
- RNDr(S, W, 47, 0x106aa070);
- RNDr(S, W, 48, 0x19a4c116);
- RNDr(S, W, 49, 0x1e376c08);
- RNDr(S, W, 50, 0x2748774c);
- RNDr(S, W, 51, 0x34b0bcb5);
- RNDr(S, W, 52, 0x391c0cb3);
- RNDr(S, W, 53, 0x4ed8aa4a);
- RNDr(S, W, 54, 0x5b9cca4f);
- RNDr(S, W, 55, 0x682e6ff3);
- RNDr(S, W, 56, 0x748f82ee);
- RNDr(S, W, 57, 0x78a5636f);
- RNDr(S, W, 58, 0x84c87814);
- RNDr(S, W, 59, 0x8cc70208);
- RNDr(S, W, 60, 0x90befffa);
- RNDr(S, W, 61, 0xa4506ceb);
- RNDr(S, W, 62, 0xbef9a3f7);
- RNDr(S, W, 63, 0xc67178f2);
+ for (i = 0; i < 64; i += 16) {
+ RNDr(S, W, 0, i);
+ RNDr(S, W, 1, i);
+ RNDr(S, W, 2, i);
+ RNDr(S, W, 3, i);
+ RNDr(S, W, 4, i);
+ RNDr(S, W, 5, i);
+ RNDr(S, W, 6, i);
+ RNDr(S, W, 7, i);
+ RNDr(S, W, 8, i);
+ RNDr(S, W, 9, i);
+ RNDr(S, W, 10, i);
+ RNDr(S, W, 11, i);
+ RNDr(S, W, 12, i);
+ RNDr(S, W, 13, i);
+ RNDr(S, W, 14, i);
+ RNDr(S, W, 15, i);
+
+ if (i == 48)
+ break;
+ MSCH(W, 0, i);
+ MSCH(W, 1, i);
+ MSCH(W, 2, i);
+ MSCH(W, 3, i);
+ MSCH(W, 4, i);
+ MSCH(W, 5, i);
+ MSCH(W, 6, i);
+ MSCH(W, 7, i);
+ MSCH(W, 8, i);
+ MSCH(W, 9, i);
+ MSCH(W, 10, i);
+ MSCH(W, 11, i);
+ MSCH(W, 12, i);
+ MSCH(W, 13, i);
+ MSCH(W, 14, i);
+ MSCH(W, 15, i);
+ }
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
@@ -213,23 +213,29 @@ static const unsigned char PAD[64] = {
static void
SHA256_Pad(SHA256_CTX * ctx)
{
- unsigned char len[8];
- uint32_t r, plen;
-
- /*
- * Convert length to bits and encode as a vector of bytes
- * -- we do this now rather than later because the length
- * will change after we pad.
- */
- vbe64enc(len, ctx->count << 3);
-
- /* Add 1--64 bytes so that the resulting length is 56 mod 64 */
- r = ctx->count & 0x3f;
- plen = (r < 56) ? (56 - r) : (120 - r);
- SHA256_Update(ctx, PAD, (size_t)plen);
-
- /* Add the terminating bit-count */
- SHA256_Update(ctx, len, 8);
+ size_t r;
+
+ /* Figure out how many bytes we have buffered. */
+ r = (ctx->count >> 3) & 0x3f;
+
+ /* Pad to 56 mod 64, transforming if we finish a block en route. */
+ if (r < 56) {
+ /* Pad to 56 mod 64. */
+ memcpy(&ctx->buf[r], PAD, 56 - r);
+ } else {
+ /* Finish the current block and mix. */
+ memcpy(&ctx->buf[r], PAD, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+
+ /* The start of the final block is all zeroes. */
+ memset(&ctx->buf[0], 0, 56);
+ }
+
+ /* Add the terminating bit-count. */
+ be64enc(&ctx->buf[56], ctx->count);
+
+ /* Mix in the final block. */
+ SHA256_Transform(ctx->state, ctx->buf);
}
/* SHA-256 initialization. Begins a SHA-256 operation. */
@@ -255,23 +261,40 @@ SHA256_Init(SHA256_CTX * ctx)
void
SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
{
- uint32_t r, l;
+ uint64_t bitlen;
+ uint32_t r;
const unsigned char *src = in;
/* Number of bytes left in the buffer from previous updates */
- r = ctx->count & 0x3f;
- while (len > 0) {
- l = 64 - r;
- if (l > len)
- l = len;
- memcpy(&ctx->buf[r], src, l);
- len -= l;
- src += l;
- ctx->count += l;
- r = ctx->count & 0x3f;
- if (r == 0)
- SHA256_Transform(ctx->state, ctx->buf);
+ r = (ctx->count >> 3) & 0x3f;
+
+ /* Convert the length into a number of bits */
+ bitlen = len << 3;
+
+ /* Update number of bits */
+ ctx->count += bitlen;
+
+ /* Handle the case where we don't need to perform any transforms */
+ if (len < 64 - r) {
+ memcpy(&ctx->buf[r], src, len);
+ return;
}
+
+ /* Finish the current block */
+ memcpy(&ctx->buf[r], src, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+ src += 64 - r;
+ len -= 64 - r;
+
+ /* Perform complete blocks */
+ while (len >= 64) {
+ SHA256_Transform(ctx->state, src);
+ src += 64;
+ len -= 64;
+ }
+
+ /* Copy left over data into buffer */
+ memcpy(ctx->buf, src, len);
}
/*
@@ -279,14 +302,14 @@ SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
* and clears the context state.
*/
void
-SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx)
+SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
{
/* Add padding */
SHA256_Pad(ctx);
/* Write the hash */
- be32enc_vect(digest, ctx->state, 32);
+ be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
/* Clear the context state */
memset((void *)ctx, 0, sizeof(*ctx));
diff --git a/tools/audit_vsha256.sh b/tools/audit_vsha256.sh
new file mode 100644
index 0000000..a81c8c0
--- /dev/null
+++ b/tools/audit_vsha256.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+#
+# Script to compare vsha256.c with FreeBSD's sha256c.c
+#
+# Run this on a up-to-date FreeBSD source tree
+
+sed '
+s/vbe32/be32/g
+' lib/libvarnish/vsha256.c |
+ diff -ub /usr/src/sys/crypto/sha2/sha256c.c -
+
More information about the varnish-commit
mailing list