iImplement chunker interface - dedup - deduplicating backup program Err bitreich.org 70 hgit clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/ URL:git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/ bitreich.org 70 1Log /scm/dedup/log.gph bitreich.org 70 1Files /scm/dedup/files.gph bitreich.org 70 1Refs /scm/dedup/refs.gph bitreich.org 70 1Tags /scm/dedup/tag bitreich.org 70 1README /scm/dedup/file/README.gph bitreich.org 70 1LICENSE /scm/dedup/file/LICENSE.gph bitreich.org 70 i--- Err bitreich.org 70 1commit e8056ddcaccd7972a5008983d9bc60a3a36498c5 /scm/dedup/commit/e8056ddcaccd7972a5008983d9bc60a3a36498c5.gph bitreich.org 70 1parent 009a69939e6ce193b6bf6ab287aa007a4b8ab11e /scm/dedup/commit/009a69939e6ce193b6bf6ab287aa007a4b8ab11e.gph bitreich.org 70 hAuthor: sin URL:mailto:sin@2f30.org bitreich.org 70 iDate: Thu, 21 Feb 2019 10:24:44 +0000 Err bitreich.org 70 i Err bitreich.org 70 iImplement chunker interface Err bitreich.org 70 i Err bitreich.org 70 iDiffstat: Err bitreich.org 70 i M Makefile | 4 ++-- Err bitreich.org 70 i A chunker.c | 106 ++++++++++++++++++++++++++++++ Err bitreich.org 70 i M dedup.c | 143 ++++--------------------------- Err bitreich.org 70 i M dedup.h | 20 ++++++++++++++++++++ Err bitreich.org 70 i A hash.c | 67 +++++++++++++++++++++++++++++++ Err bitreich.org 70 i Err bitreich.org 70 i5 files changed, 212 insertions(+), 128 deletions(-) Err bitreich.org 70 i--- Err bitreich.org 70 1diff --git a/Makefile b/Makefile /scm/dedup/file/Makefile.gph bitreich.org 70 i@@ -2,8 +2,8 @@ VERSION = 0.3 Err bitreich.org 70 i PREFIX = /usr/local Err bitreich.org 70 i MANPREFIX = $(PREFIX)/man Err bitreich.org 70 i BIN = dedup Err bitreich.org 70 i-SRC = $(BIN).c pack.c unpack.c Err bitreich.org 70 i-OBJ = $(BIN).o pack.o unpack.o Err bitreich.org 70 i+SRC = $(BIN).c chunker.c hash.c pack.c unpack.c Err bitreich.org 70 i+OBJ = $(BIN).o chunker.o hash.o pack.o unpack.o Err bitreich.org 70 i DISTFILES = $(SRC) LICENSE Makefile README arg.h $(BIN).1 dedup.h tree.h Err bitreich.org 70 i Err bitreich.org 70 i CFLAGS = -g -Wall Err bitreich.org 70 1diff --git a/chunker.c b/chunker.c /scm/dedup/file/chunker.c.gph bitreich.org 70 i@@ -0,0 +1,106 @@ Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+ Err bitreich.org 70 i+#include "dedup.h" Err bitreich.org 70 i+ Err bitreich.org 70 i+struct chunker { Err bitreich.org 70 i+ uint8_t *buf; Err bitreich.org 70 i+ size_t size; Err bitreich.org 70 i+ size_t pos; Err bitreich.org 70 i+ int fd; Err bitreich.org 70 i+}; Err bitreich.org 70 i+ Err bitreich.org 70 i+static size_t Err bitreich.org 70 i+get_chunk_size(struct chunker *chunker) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ uint8_t *bp; Err bitreich.org 70 i+ size_t i; Err bitreich.org 70 i+ uint32_t fp; Err bitreich.org 70 i+ Err bitreich.org 70 i+ /* buzhash should be at least WINSIZE */ Err bitreich.org 70 i+ if (chunker->pos < WINSIZE) Err bitreich.org 70 i+ return chunker->pos; Err bitreich.org 70 i+ Err bitreich.org 70 i+ bp = chunker->buf; Err bitreich.org 70 i+ Err bitreich.org 70 i+ /* Err bitreich.org 70 i+ * To achieve better deduplication, we chunk blocks based on a Err bitreich.org 70 i+ * recurring pattern occuring on the data stream. A fixed window Err bitreich.org 70 i+ * of WINSIZE bytes is slid over the data, and a rolling hash is Err bitreich.org 70 i+ * computed for this window. Err bitreich.org 70 i+ * When the rolling hash matches a given pattern (see HASHMSK), Err bitreich.org 70 i+ * the block is chunked at the end of that window, thus making Err bitreich.org 70 i+ * WINSIZE the smallest possible block size. Err bitreich.org 70 i+ */ Err bitreich.org 70 i+ fp = buzh_init(bp, WINSIZE); Err bitreich.org 70 i+ for (i = 0; i < chunker->pos - WINSIZE; i++) { Err bitreich.org 70 i+ if (i > 0) Err bitreich.org 70 i+ fp = buzh_update(fp, bp[i - 1], bp[WINSIZE + i - 1], Err bitreich.org 70 i+ WINSIZE); Err bitreich.org 70 i+ if ((fp & HASHMSK) == 0) Err bitreich.org 70 i+ return i + WINSIZE; Err bitreich.org 70 i+ } Err bitreich.org 70 i+ return chunker->pos; Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+struct chunker * Err bitreich.org 70 i+alloc_chunker(size_t size, int fd) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ struct chunker *chunker; Err bitreich.org 70 i+ Err bitreich.org 70 i+ chunker = malloc(sizeof(*chunker)); Err bitreich.org 70 i+ if (chunker == NULL) Err bitreich.org 70 i+ err(1, "malloc"); Err bitreich.org 70 i+ Err bitreich.org 70 i+ chunker->buf = malloc(size); Err bitreich.org 70 i+ if (chunker->buf == NULL) Err bitreich.org 70 i+ err(1, "malloc"); Err bitreich.org 70 i+ chunker->size = size; Err bitreich.org 70 i+ chunker->pos = 0; Err bitreich.org 70 i+ chunker->fd = fd; Err bitreich.org 70 i+ Err bitreich.org 70 i+ return chunker; Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+void Err bitreich.org 70 i+free_chunker(struct chunker *chunker) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ free(chunker->buf); Err bitreich.org 70 i+ free(chunker); Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+ssize_t Err bitreich.org 70 i+fill_chunker(struct chunker *chunker) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ uint8_t *bp; Err bitreich.org 70 i+ ssize_t n; Err bitreich.org 70 i+ Err bitreich.org 70 i+ bp = &chunker->buf[chunker->pos]; Err bitreich.org 70 i+ n = read(chunker->fd, bp, chunker->size - chunker->pos); Err bitreich.org 70 i+ if (n < 0) Err bitreich.org 70 i+ err(1, "read"); Err bitreich.org 70 i+ chunker->pos += n; Err bitreich.org 70 i+ return chunker->pos; Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+void Err bitreich.org 70 i+drain_chunker(struct chunker *chunker, size_t chunk_size) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ uint8_t *src, *dst; Err bitreich.org 70 i+ Err bitreich.org 70 i+ src = &chunker->buf[chunk_size]; Err bitreich.org 70 i+ dst = chunker->buf; Err bitreich.org 70 i+ memmove(dst, src, chunker->pos - chunk_size); Err bitreich.org 70 i+ chunker->pos -= chunk_size; Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+uint8_t * Err bitreich.org 70 i+get_chunk(struct chunker *chunker, size_t *size) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ *size = get_chunk_size(chunker); Err bitreich.org 70 i+ return chunker->buf; Err bitreich.org 70 i+} Err bitreich.org 70 1diff --git a/dedup.c b/dedup.c /scm/dedup/file/dedup.c.gph bitreich.org 70 i@@ -19,9 +19,6 @@ Err bitreich.org 70 i #define STOREF ".store" Err bitreich.org 70 i #define CACHEF ".cache" Err bitreich.org 70 i Err bitreich.org 70 i-#define BLKSIZE 4096 Err bitreich.org 70 i-#define WINSIZE 511 Err bitreich.org 70 i-#define HASHMSK ((1ul << 10) - 1) Err bitreich.org 70 i #define MSGSIZE 256 Err bitreich.org 70 i #define MDSIZE SHA256_DIGEST_LENGTH Err bitreich.org 70 i Err bitreich.org 70 i@@ -29,8 +26,6 @@ Err bitreich.org 70 i #define VER_MIN 1 Err bitreich.org 70 i #define VER_MAJ 0 Err bitreich.org 70 i Err bitreich.org 70 i-#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) Err bitreich.org 70 i- Err bitreich.org 70 i enum { Err bitreich.org 70 i WALK_CONTINUE, Err bitreich.org 70 i WALK_STOP Err bitreich.org 70 i@@ -89,99 +84,6 @@ unsigned long long cache_hits; Err bitreich.org 70 i unsigned long long cache_misses; Err bitreich.org 70 i char *argv0; Err bitreich.org 70 i Err bitreich.org 70 i-/* Err bitreich.org 70 i- * Static table for use in buzhash algorithm. Err bitreich.org 70 i- * 256 * 32 bits randomly generated unique integers Err bitreich.org 70 i- * Err bitreich.org 70 i- * To get better pseudo-random results, there is exactly the same number Err bitreich.org 70 i- * of 0 and 1 spread amongst these integers. It means that there is Err bitreich.org 70 i- * exactly 50% chance that a XOR operation would flip all the bits in Err bitreich.org 70 i- * the hash. Err bitreich.org 70 i- */ Err bitreich.org 70 i-uint32_t buz[] = { Err bitreich.org 70 i- 0xbc9fa594,0x30a8f827,0xced627a7,0xdb46a745,0xcfa4a9e8,0x77cccb59,0xddb66276,0x3adc532f, Err bitreich.org 70 i- 0xfe8b67d3,0x8155b59e,0x0c893666,0x1d757009,0x17394ee4,0x85d94c07,0xcacd52da,0x076c6f79, Err bitreich.org 70 i- 0xead0a798,0x6c7ccb4a,0x2639a1b8,0x3aa5ae32,0x3e6218d2,0xb290d980,0xa5149521,0x4b426119, Err bitreich.org 70 i- 0xd3230fc7,0x677c1cc4,0x2b64603c,0x01fe92a8,0xbe358296,0xa7e7fac7,0xf509bf41,0x04b017ad, Err bitreich.org 70 i- 0xf900344c,0x8e14e202,0xb2a6e9b4,0x3db3c311,0x960286a8,0xf6bf0468,0xed54ec94,0xf358070c, Err bitreich.org 70 i- 0x6a4795dd,0x3f7b925c,0x5e13a060,0xfaecbafe,0x03c8bb55,0x8a56ba88,0x633e3b49,0xe036bbbe, Err bitreich.org 70 i- 0x1ed3dbb5,0x76e8ad74,0x79d346ab,0x44b4ccc4,0x71eb22d3,0xa1aa3f24,0x50e05b81,0xa3b450d3, Err bitreich.org 70 i- 0x7f5caffb,0xa1990650,0x54c44800,0xda134b65,0x72362eea,0xbd12b8e6,0xf7c99fdc,0x020d48c7, Err bitreich.org 70 i- 0x9d9c3d46,0x32b75615,0xe61923cf,0xadc09d8f,0xab11376b,0xd66fe4cd,0xb3b086b6,0xb8345b9f, Err bitreich.org 70 i- 0x59029667,0xae0e937c,0xcbd4d4ba,0x720bb3fb,0x5f7d2ca3,0xec24ba15,0x6b40109b,0xf0a54587, Err bitreich.org 70 i- 0x3acf9420,0x466e981d,0xc66dc124,0x150ef7b4,0xc3ce718e,0x136774f5,0x46684ab4,0xb4b490f0, Err bitreich.org 70 i- 0x26508a8b,0xf12febc8,0x4b99171b,0xfc373c84,0x339b5677,0x41703ff3,0x7cadbbd7,0x15ea24e2, Err bitreich.org 70 i- 0x7a2f9783,0xed6a383a,0x649eb072,0x79970941,0x2abd28ad,0x4375e00c,0x9df084f7,0x6fdeec6c, Err bitreich.org 70 i- 0x6619ac6d,0x7d256f4d,0x9b8e658a,0x3d7627e9,0xd5a98d45,0x15f84223,0x9b6acef5,0xf876be67, Err bitreich.org 70 i- 0xe3ae7089,0x84e2b64a,0x6818a969,0x86e9ba4e,0xa24a5b57,0x61570cf1,0xa5f8fc91,0x879d8383, Err bitreich.org 70 i- 0x91b13866,0x75e87961,0x16db8138,0x5a2ff6b8,0x8f664e9b,0x894e1496,0x88235c5b,0xcdb3b580, Err bitreich.org 70 i- 0xa2e80109,0xb0f88a82,0xd12cd340,0x93fbc37d,0xf4d1eb82,0xce42f309,0x16ffd2c2,0xb4dfef2b, Err bitreich.org 70 i- 0xb8b1a33e,0x4708a5e6,0xba66dd88,0xa9ec0da6,0x6f8ee2c9,0xad8b9993,0x1d6a25a8,0x1f3d08ce, Err bitreich.org 70 i- 0x149c04e7,0x5cd1fa51,0xb84c89c7,0xeced6f8c,0xe328b30f,0x084fa836,0x6d1bb1b7,0x94c78ea5, Err bitreich.org 70 i- 0x14973034,0xf1a1bcef,0x48b798d2,0xded9ca9e,0x5fd965d0,0x92544eb1,0x5e80f189,0xcbbf5e15, Err bitreich.org 70 i- 0x4d8121f0,0x5dd3b92f,0xd9ea98fb,0x2dbf5644,0x0fbcb9b7,0x20a1db53,0x7c3fcc98,0x36744fbd, Err bitreich.org 70 i- 0xced08954,0x8e7c5efe,0x3c5f6733,0x657477be,0x3630a02d,0x38bcbda0,0xb7702575,0x4a7f4bce, Err bitreich.org 70 i- 0x0e7660fe,0x4dcb91b5,0x4fd7ffd3,0x041821c1,0xa846a181,0xc8048e9e,0xd4b05072,0x986e0509, Err bitreich.org 70 i- 0xa00aaeeb,0x02e3526a,0x2fac4843,0xfa98e805,0x923ecd8d,0x395d9546,0x8674c3cd,0xae5a8a71, Err bitreich.org 70 i- 0x966dfe45,0x5c9ceba5,0x0830a1cf,0xa1750981,0x8f604480,0x28ea0c9a,0x0da12413,0x98b0b3c5, Err bitreich.org 70 i- 0xa21d473a,0x96ce4308,0xe9a1001b,0x8bbacb44,0x18bad3f4,0xe3121acb,0x46a9b45f,0x92cd9704, Err bitreich.org 70 i- 0xc1a7c619,0x3281e361,0x462e8c79,0x9e572f93,0x7239e5f0,0x67d8e6ba,0x13747ce3,0xf01ee64a, Err bitreich.org 70 i- 0xe7d0ae12,0xeea04088,0xe5b36767,0x17558eae,0x678ffbe6,0xe0bbc866,0x0c24adec,0xa9cbb869, Err bitreich.org 70 i- 0x3fd44ee1,0x9ca4ca06,0x04c0ef00,0x04589a21,0x9cf9c819,0x976f6ca1,0x8a30e66a,0x004d6f7e, Err bitreich.org 70 i- 0x384c8851,0x5bc97eb8,0xc6c49339,0x5aa386c7,0x74bdf8af,0x9b713750,0x4112f8c2,0x2895dae1, Err bitreich.org 70 i- 0xf576d905,0x9de98bce,0xb2b26bcd,0xd46707a0,0x147fbb46,0xa52c6e50,0xe43128fc,0x374ad964, Err bitreich.org 70 i- 0x8dfd4d53,0xc4d0c087,0x31dfb5ca,0xa44589b5,0x6b637e2e,0x663f6b45,0xd2d8baa0,0x1dac7e4c Err bitreich.org 70 i-}; Err bitreich.org 70 i- Err bitreich.org 70 i-/* Buzhash: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial */ Err bitreich.org 70 i-uint32_t Err bitreich.org 70 i-buzh_init(uint8_t *buf, size_t size) Err bitreich.org 70 i-{ Err bitreich.org 70 i- size_t i; Err bitreich.org 70 i- uint32_t fp; Err bitreich.org 70 i- Err bitreich.org 70 i- for (i = size - 1, fp = 0; i > 0; i--, buf++) Err bitreich.org 70 i- fp ^= ROTL(buz[*buf], i % 32); Err bitreich.org 70 i- Err bitreich.org 70 i- return fp ^ buz[*buf]; Err bitreich.org 70 i-} Err bitreich.org 70 i- Err bitreich.org 70 i-uint32_t Err bitreich.org 70 i-buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t size) Err bitreich.org 70 i-{ Err bitreich.org 70 i- return ROTL(fp, 1) ^ ROTL(buz[out], size % 32) ^ buz[in]; Err bitreich.org 70 i-} Err bitreich.org 70 i- Err bitreich.org 70 i-uint64_t Err bitreich.org 70 i-chunk_blk(uint8_t *buf, size_t size) Err bitreich.org 70 i-{ Err bitreich.org 70 i- size_t i; Err bitreich.org 70 i- uint32_t fp; Err bitreich.org 70 i- Err bitreich.org 70 i- /* buzhash should be at least WINSIZE */ Err bitreich.org 70 i- if (size < WINSIZE) Err bitreich.org 70 i- return size; Err bitreich.org 70 i- Err bitreich.org 70 i- /* Err bitreich.org 70 i- * To achieve better deduplication, we chunk blocks based on a Err bitreich.org 70 i- * recurring pattern occuring on the data stream. A fixed window Err bitreich.org 70 i- * of WINSIZE bytes is slid over the data, and a rolling hash is Err bitreich.org 70 i- * computed for this window. Err bitreich.org 70 i- * When the rolling hash matches a given pattern (see HASHMSK), Err bitreich.org 70 i- * the block is chunked at the end of that window, thus making Err bitreich.org 70 i- * WINSIZE the smallest possible block size. Err bitreich.org 70 i- */ Err bitreich.org 70 i- fp = buzh_init(buf, WINSIZE); Err bitreich.org 70 i- for (i = 0; i < size - WINSIZE; i++) { Err bitreich.org 70 i- if (i > 0) Err bitreich.org 70 i- fp = buzh_update(fp, buf[i - 1], buf[WINSIZE + i - 1], Err bitreich.org 70 i- WINSIZE); Err bitreich.org 70 i- if ((fp & HASHMSK) == 0) Err bitreich.org 70 i- return i + WINSIZE; Err bitreich.org 70 i- } Err bitreich.org 70 i- return size; Err bitreich.org 70 i-} Err bitreich.org 70 i- Err bitreich.org 70 i size_t Err bitreich.org 70 i comp_size(size_t size) Err bitreich.org 70 i { Err bitreich.org 70 i@@ -460,43 +362,33 @@ lookup_blk_desc(uint8_t *md, struct blk_desc *blk_desc) Err bitreich.org 70 i void Err bitreich.org 70 i dedup(int fd, char *msg) Err bitreich.org 70 i { Err bitreich.org 70 i- uint8_t *buf[2]; Err bitreich.org 70 i struct snapshot *snap; Err bitreich.org 70 i+ struct chunker *chunker; Err bitreich.org 70 i+ uint8_t *comp_buf; Err bitreich.org 70 i SHA256_CTX ctx; Err bitreich.org 70 i- ssize_t n, bufsize; Err bitreich.org 70 i+ ssize_t n; Err bitreich.org 70 i Err bitreich.org 70 i- buf[0] = alloc_buf(BLKSIZE); Err bitreich.org 70 i- buf[1] = alloc_buf(comp_size(BLKSIZE)); Err bitreich.org 70 i snap = alloc_snap(); Err bitreich.org 70 i+ chunker = alloc_chunker(BLKSIZE, fd); Err bitreich.org 70 i+ comp_buf = alloc_buf(comp_size(BLKSIZE)); Err bitreich.org 70 i Err bitreich.org 70 i- bufsize = 0; Err bitreich.org 70 i SHA256_Init(&ctx); Err bitreich.org 70 i- while ((n = xread(fd, buf[0] + bufsize, BLKSIZE - bufsize)) > 0 || Err bitreich.org 70 i- bufsize > 0) { Err bitreich.org 70 i- Err bitreich.org 70 i+ while ((n = fill_chunker(chunker)) > 0) { Err bitreich.org 70 i uint8_t md[MDSIZE]; Err bitreich.org 70 i struct blk_desc blk_desc; Err bitreich.org 70 i- size_t blksize, csize; Err bitreich.org 70 i- uint8_t *inp = buf[0]; /* input buf */ Err bitreich.org 70 i- uint8_t *outp = buf[1]; /* compressed buf */ Err bitreich.org 70 i+ size_t chunk_size, csize; Err bitreich.org 70 i+ uint8_t *chunkp; Err bitreich.org 70 i Err bitreich.org 70 i- if (n > 0) { Err bitreich.org 70 i- bufsize += n; Err bitreich.org 70 i- snaphdr.st.orig_size += n; Err bitreich.org 70 i- } Err bitreich.org 70 i+ chunkp = get_chunk(chunker, &chunk_size); Err bitreich.org 70 i+ SHA256_Update(&ctx, chunkp, chunk_size); Err bitreich.org 70 i Err bitreich.org 70 i- blksize = chunk_blk(inp, bufsize); Err bitreich.org 70 i- csize = comp(inp, outp, blksize, comp_size(BLKSIZE)); Err bitreich.org 70 i+ csize = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE)); Err bitreich.org 70 i+ hash_blk(comp_buf, csize, md); Err bitreich.org 70 i Err bitreich.org 70 i+ snaphdr.st.orig_size += chunk_size; Err bitreich.org 70 i snaphdr.st.comp_size += csize; Err bitreich.org 70 i Err bitreich.org 70 i- hash_blk(outp, csize, md); Err bitreich.org 70 i- Err bitreich.org 70 i- /* Calculate file hash one block at a time */ Err bitreich.org 70 i- SHA256_Update(&ctx, inp, blksize); Err bitreich.org 70 i- Err bitreich.org 70 i snap = grow_snap(snap, snap->nr_blk_descs + 1); Err bitreich.org 70 i- Err bitreich.org 70 i if (lookup_blk_desc(md, &blk_desc) < 0) { Err bitreich.org 70 i struct cache_entry *ent; Err bitreich.org 70 i Err bitreich.org 70 i@@ -506,7 +398,7 @@ dedup(int fd, char *msg) Err bitreich.org 70 i Err bitreich.org 70 i snap->blk_desc[snap->nr_blk_descs++] = blk_desc; Err bitreich.org 70 i Err bitreich.org 70 i- append_blk(outp, &blk_desc); Err bitreich.org 70 i+ append_blk(comp_buf, &blk_desc); Err bitreich.org 70 i Err bitreich.org 70 i ent = alloc_cache_entry(); Err bitreich.org 70 i ent->blk_desc = blk_desc; Err bitreich.org 70 i@@ -526,8 +418,7 @@ dedup(int fd, char *msg) Err bitreich.org 70 i cache_hits++; Err bitreich.org 70 i } Err bitreich.org 70 i Err bitreich.org 70 i- memmove(inp, inp + blksize, bufsize - blksize); Err bitreich.org 70 i- bufsize -= blksize; Err bitreich.org 70 i+ drain_chunker(chunker, chunk_size); Err bitreich.org 70 i } Err bitreich.org 70 i Err bitreich.org 70 i if (snap->nr_blk_descs > 0) { Err bitreich.org 70 i@@ -546,9 +437,9 @@ dedup(int fd, char *msg) Err bitreich.org 70 i append_snap(snap); Err bitreich.org 70 i } Err bitreich.org 70 i Err bitreich.org 70 i+ free(comp_buf); Err bitreich.org 70 i+ free_chunker(chunker); Err bitreich.org 70 i free(snap); Err bitreich.org 70 i- free(buf[1]); Err bitreich.org 70 i- free(buf[0]); Err bitreich.org 70 i } Err bitreich.org 70 i Err bitreich.org 70 i int Err bitreich.org 70 1diff --git a/dedup.h b/dedup.h /scm/dedup/file/dedup.h.gph bitreich.org 70 i@@ -1,2 +1,22 @@ Err bitreich.org 70 i+#define BLKSIZE 4096 Err bitreich.org 70 i+#define WINSIZE 511 Err bitreich.org 70 i+#define HASHMSK ((1ul << 10) - 1) Err bitreich.org 70 i+ Err bitreich.org 70 i+struct chunker; Err bitreich.org 70 i+ Err bitreich.org 70 i+/* chunker.c */ Err bitreich.org 70 i+struct chunker *alloc_chunker(size_t size, int fd); Err bitreich.org 70 i+void free_chunker(struct chunker *chunker); Err bitreich.org 70 i+ssize_t fill_chunker(struct chunker *chunker); Err bitreich.org 70 i+void drain_chunker(struct chunker *chunker, size_t n); Err bitreich.org 70 i+uint8_t *get_chunk(struct chunker *chunker, size_t *size); Err bitreich.org 70 i+ Err bitreich.org 70 i+/* hash.c */ Err bitreich.org 70 i+uint32_t buzh_init(uint8_t *buf, size_t size); Err bitreich.org 70 i+uint32_t buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t size); Err bitreich.org 70 i+ Err bitreich.org 70 i+/* pack.c */ Err bitreich.org 70 i int pack(unsigned char *dst, char *fmt, ...); Err bitreich.org 70 i+ Err bitreich.org 70 i+/* unpack.c */ Err bitreich.org 70 i int unpack(unsigned char *src, char *fmt, ...); Err bitreich.org 70 1diff --git a/hash.c b/hash.c /scm/dedup/file/hash.c.gph bitreich.org 70 i@@ -0,0 +1,67 @@ Err bitreich.org 70 i+#include Err bitreich.org 70 i+#include Err bitreich.org 70 i+ Err bitreich.org 70 i+#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) Err bitreich.org 70 i+ Err bitreich.org 70 i+/* Err bitreich.org 70 i+ * Static table for use in buzhash algorithm. Err bitreich.org 70 i+ * 256 * 32 bits randomly generated unique integers Err bitreich.org 70 i+ * Err bitreich.org 70 i+ * To get better pseudo-random results, there is exactly the same number Err bitreich.org 70 i+ * of 0 and 1 spread amongst these integers. It means that there is Err bitreich.org 70 i+ * exactly 50% chance that a XOR operation would flip all the bits in Err bitreich.org 70 i+ * the hash. Err bitreich.org 70 i+ */ Err bitreich.org 70 i+static uint32_t buz[] = { Err bitreich.org 70 i+ 0xbc9fa594,0x30a8f827,0xced627a7,0xdb46a745,0xcfa4a9e8,0x77cccb59,0xddb66276,0x3adc532f, Err bitreich.org 70 i+ 0xfe8b67d3,0x8155b59e,0x0c893666,0x1d757009,0x17394ee4,0x85d94c07,0xcacd52da,0x076c6f79, Err bitreich.org 70 i+ 0xead0a798,0x6c7ccb4a,0x2639a1b8,0x3aa5ae32,0x3e6218d2,0xb290d980,0xa5149521,0x4b426119, Err bitreich.org 70 i+ 0xd3230fc7,0x677c1cc4,0x2b64603c,0x01fe92a8,0xbe358296,0xa7e7fac7,0xf509bf41,0x04b017ad, Err bitreich.org 70 i+ 0xf900344c,0x8e14e202,0xb2a6e9b4,0x3db3c311,0x960286a8,0xf6bf0468,0xed54ec94,0xf358070c, Err bitreich.org 70 i+ 0x6a4795dd,0x3f7b925c,0x5e13a060,0xfaecbafe,0x03c8bb55,0x8a56ba88,0x633e3b49,0xe036bbbe, Err bitreich.org 70 i+ 0x1ed3dbb5,0x76e8ad74,0x79d346ab,0x44b4ccc4,0x71eb22d3,0xa1aa3f24,0x50e05b81,0xa3b450d3, Err bitreich.org 70 i+ 0x7f5caffb,0xa1990650,0x54c44800,0xda134b65,0x72362eea,0xbd12b8e6,0xf7c99fdc,0x020d48c7, Err bitreich.org 70 i+ 0x9d9c3d46,0x32b75615,0xe61923cf,0xadc09d8f,0xab11376b,0xd66fe4cd,0xb3b086b6,0xb8345b9f, Err bitreich.org 70 i+ 0x59029667,0xae0e937c,0xcbd4d4ba,0x720bb3fb,0x5f7d2ca3,0xec24ba15,0x6b40109b,0xf0a54587, Err bitreich.org 70 i+ 0x3acf9420,0x466e981d,0xc66dc124,0x150ef7b4,0xc3ce718e,0x136774f5,0x46684ab4,0xb4b490f0, Err bitreich.org 70 i+ 0x26508a8b,0xf12febc8,0x4b99171b,0xfc373c84,0x339b5677,0x41703ff3,0x7cadbbd7,0x15ea24e2, Err bitreich.org 70 i+ 0x7a2f9783,0xed6a383a,0x649eb072,0x79970941,0x2abd28ad,0x4375e00c,0x9df084f7,0x6fdeec6c, Err bitreich.org 70 i+ 0x6619ac6d,0x7d256f4d,0x9b8e658a,0x3d7627e9,0xd5a98d45,0x15f84223,0x9b6acef5,0xf876be67, Err bitreich.org 70 i+ 0xe3ae7089,0x84e2b64a,0x6818a969,0x86e9ba4e,0xa24a5b57,0x61570cf1,0xa5f8fc91,0x879d8383, Err bitreich.org 70 i+ 0x91b13866,0x75e87961,0x16db8138,0x5a2ff6b8,0x8f664e9b,0x894e1496,0x88235c5b,0xcdb3b580, Err bitreich.org 70 i+ 0xa2e80109,0xb0f88a82,0xd12cd340,0x93fbc37d,0xf4d1eb82,0xce42f309,0x16ffd2c2,0xb4dfef2b, Err bitreich.org 70 i+ 0xb8b1a33e,0x4708a5e6,0xba66dd88,0xa9ec0da6,0x6f8ee2c9,0xad8b9993,0x1d6a25a8,0x1f3d08ce, Err bitreich.org 70 i+ 0x149c04e7,0x5cd1fa51,0xb84c89c7,0xeced6f8c,0xe328b30f,0x084fa836,0x6d1bb1b7,0x94c78ea5, Err bitreich.org 70 i+ 0x14973034,0xf1a1bcef,0x48b798d2,0xded9ca9e,0x5fd965d0,0x92544eb1,0x5e80f189,0xcbbf5e15, Err bitreich.org 70 i+ 0x4d8121f0,0x5dd3b92f,0xd9ea98fb,0x2dbf5644,0x0fbcb9b7,0x20a1db53,0x7c3fcc98,0x36744fbd, Err bitreich.org 70 i+ 0xced08954,0x8e7c5efe,0x3c5f6733,0x657477be,0x3630a02d,0x38bcbda0,0xb7702575,0x4a7f4bce, Err bitreich.org 70 i+ 0x0e7660fe,0x4dcb91b5,0x4fd7ffd3,0x041821c1,0xa846a181,0xc8048e9e,0xd4b05072,0x986e0509, Err bitreich.org 70 i+ 0xa00aaeeb,0x02e3526a,0x2fac4843,0xfa98e805,0x923ecd8d,0x395d9546,0x8674c3cd,0xae5a8a71, Err bitreich.org 70 i+ 0x966dfe45,0x5c9ceba5,0x0830a1cf,0xa1750981,0x8f604480,0x28ea0c9a,0x0da12413,0x98b0b3c5, Err bitreich.org 70 i+ 0xa21d473a,0x96ce4308,0xe9a1001b,0x8bbacb44,0x18bad3f4,0xe3121acb,0x46a9b45f,0x92cd9704, Err bitreich.org 70 i+ 0xc1a7c619,0x3281e361,0x462e8c79,0x9e572f93,0x7239e5f0,0x67d8e6ba,0x13747ce3,0xf01ee64a, Err bitreich.org 70 i+ 0xe7d0ae12,0xeea04088,0xe5b36767,0x17558eae,0x678ffbe6,0xe0bbc866,0x0c24adec,0xa9cbb869, Err bitreich.org 70 i+ 0x3fd44ee1,0x9ca4ca06,0x04c0ef00,0x04589a21,0x9cf9c819,0x976f6ca1,0x8a30e66a,0x004d6f7e, Err bitreich.org 70 i+ 0x384c8851,0x5bc97eb8,0xc6c49339,0x5aa386c7,0x74bdf8af,0x9b713750,0x4112f8c2,0x2895dae1, Err bitreich.org 70 i+ 0xf576d905,0x9de98bce,0xb2b26bcd,0xd46707a0,0x147fbb46,0xa52c6e50,0xe43128fc,0x374ad964, Err bitreich.org 70 i+ 0x8dfd4d53,0xc4d0c087,0x31dfb5ca,0xa44589b5,0x6b637e2e,0x663f6b45,0xd2d8baa0,0x1dac7e4c Err bitreich.org 70 i+}; Err bitreich.org 70 i+ Err bitreich.org 70 i+/* Buzhash: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial */ Err bitreich.org 70 i+uint32_t Err bitreich.org 70 i+buzh_init(uint8_t *buf, size_t size) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ size_t i; Err bitreich.org 70 i+ uint32_t fp; Err bitreich.org 70 i+ Err bitreich.org 70 i+ for (i = size - 1, fp = 0; i > 0; i--, buf++) Err bitreich.org 70 i+ fp ^= ROTL(buz[*buf], i % 32); Err bitreich.org 70 i+ Err bitreich.org 70 i+ return fp ^ buz[*buf]; Err bitreich.org 70 i+} Err bitreich.org 70 i+ Err bitreich.org 70 i+uint32_t Err bitreich.org 70 i+buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t size) Err bitreich.org 70 i+{ Err bitreich.org 70 i+ return ROTL(fp, 1) ^ ROTL(buz[out], size % 32) ^ buz[in]; Err bitreich.org 70 i+} Err bitreich.org 70 .