|
|
tdpack.c - dedup - data deduplication program |
|
|
 |
git clone git://bitreich.org/dedup/ git://hg6vgqziawt5s4dj.onion/dedup/ (git://bitreich.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
tdpack.c (4115B) |
|
|
|
--- |
|
|
|
1 #include <sys/types.h> |
|
|
|
2 #include <sys/stat.h> |
|
|
|
3 #include <sys/file.h> |
|
|
|
4 |
|
|
|
5 #include <err.h> |
|
|
|
6 #include <fcntl.h> |
|
|
|
7 #include <stdio.h> |
|
|
|
8 #include <stdint.h> |
|
|
|
9 #include <stdlib.h> |
|
|
|
10 #include <string.h> |
|
|
|
11 #include <unistd.h> |
|
|
|
12 |
|
|
|
13 #include "arg.h" |
|
|
|
14 #include "blake2.h" |
|
|
|
15 #include "dedup.h" |
|
|
|
16 |
|
|
|
17 static struct snap_hdr snap_hdr; |
|
|
|
18 static struct blk_hdr blk_hdr; |
|
|
|
19 static struct icache *icache; |
|
|
|
20 static int ifd; |
|
|
|
21 static int sfd; |
|
|
|
22 static int hash_algo = HASH_BLAKE2B; |
|
|
|
23 static int compr_algo = COMPR_LZ4; |
|
|
|
24 |
|
|
|
25 int verbose; |
|
|
|
26 char *argv0; |
|
|
|
27 |
|
|
|
28 static void |
|
|
|
29 dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size) |
|
|
|
30 { |
|
|
|
31 uint8_t md[MD_SIZE]; |
|
|
|
32 struct blk_desc blk_desc; |
|
|
|
33 struct compr_ctx ctx; |
|
|
|
34 uint8_t *compr_buf; |
|
|
|
35 size_t n, csize; |
|
|
|
36 |
|
|
|
37 if (compr_init(&ctx, compr_algo) < 0) |
|
|
|
38 errx(1, "compr_init failed"); |
|
|
|
39 csize = compr_size(&ctx, BLKSIZE_MAX); |
|
|
|
40 compr_buf = alloc_buf(csize); |
|
|
|
41 |
|
|
|
42 n = compr(&ctx, chunkp, compr_buf, chunk_size, csize); |
|
|
|
43 hash_blk(compr_buf, n, md, hash_algo); |
|
|
|
44 |
|
|
|
45 snap_hdr.st.orig_size += chunk_size; |
|
|
|
46 snap_hdr.st.compr_size += n; |
|
|
|
47 |
|
|
|
48 memcpy(blk_desc.md, md, sizeof(blk_desc.md)); |
|
|
|
49 if (lookup_icache(icache, &blk_desc) < 0) { |
|
|
|
50 blk_desc.offset = blk_hdr.size; |
|
|
|
51 blk_desc.size = n; |
|
|
|
52 |
|
|
|
53 snap->blk_desc[snap->nr_blk_descs++] = blk_desc; |
|
|
|
54 append_blk(sfd, &blk_hdr, compr_buf, &blk_desc); |
|
|
|
55 |
|
|
|
56 insert_icache(icache, &blk_desc); |
|
|
|
57 |
|
|
|
58 snap_hdr.st.dedup_size += blk_desc.size; |
|
|
|
59 snap_hdr.st.nr_blks++; |
|
|
|
60 |
|
|
|
61 if (blk_desc.size > snap_hdr.st.max_blk_size) |
|
|
|
62 snap_hdr.st.max_blk_size = blk_desc.size; |
|
|
|
63 if (blk_desc.size < snap_hdr.st.min_blk_size) |
|
|
|
64 snap_hdr.st.min_blk_size = blk_desc.size; |
|
|
|
65 } else { |
|
|
|
66 snap->blk_desc[snap->nr_blk_descs++] = blk_desc; |
|
|
|
67 } |
|
|
|
68 |
|
|
|
69 free(compr_buf); |
|
|
|
70 compr_final(&ctx); |
|
|
|
71 } |
|
|
|
72 |
|
|
|
73 static void |
|
|
|
74 dedup(int fd, char *msg) |
|
|
|
75 { |
|
|
|
76 struct snap *snap; |
|
|
|
77 struct chunker *chunker; |
|
|
|
78 |
|
|
|
79 snap = alloc_snap(); |
|
|
|
80 chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX, |
|
|
|
81 HASHMASK_BITS, WINSIZE); |
|
|
|
82 |
|
|
|
83 while (fill_chunker(chunker) > 0) { |
|
|
|
84 uint8_t *chunkp; |
|
|
|
85 size_t chunk_size; |
|
|
|
86 |
|
|
|
87 chunkp = get_chunk(chunker, &chunk_size); |
|
|
|
88 snap = grow_snap(snap, snap->nr_blk_descs + 1); |
|
|
|
89 dedup_chunk(snap, chunkp, chunk_size); |
|
|
|
90 drain_chunker(chunker); |
|
|
|
91 } |
|
|
|
92 |
|
|
|
93 if (snap->nr_blk_descs > 0) { |
|
|
|
94 if (msg != NULL) { |
|
|
|
95 size_t size; |
|
|
|
96 |
|
|
|
97 size = strlen(msg) + 1; |
|
|
|
98 if (size > sizeof(snap->msg)) |
|
|
|
99 size = sizeof(snap->msg); |
|
|
|
100 memcpy(snap->msg, msg, size); |
|
|
|
101 snap->msg[size - 1] = '\0'; |
|
|
|
102 } |
|
|
|
103 hash_snap(snap, snap->md, hash_algo); |
|
|
|
104 append_snap(ifd, &snap_hdr, snap); |
|
|
|
105 } |
|
|
|
106 |
|
|
|
107 free_chunker(chunker); |
|
|
|
108 free_snap(snap); |
|
|
|
109 } |
|
|
|
110 |
|
|
|
111 static int |
|
|
|
112 build_icache(struct snap *snap, void *arg) |
|
|
|
113 { |
|
|
|
114 struct compr_ctx ctx; |
|
|
|
115 uint8_t *buf; |
|
|
|
116 uint64_t i; |
|
|
|
117 |
|
|
|
118 if (compr_init(&ctx, compr_algo) < 0) |
|
|
|
119 errx(1, "compr_init failed"); |
|
|
|
120 buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX)); |
|
|
|
121 for (i = 0; i < snap->nr_blk_descs; i++) { |
|
|
|
122 struct blk_desc *blk_desc; |
|
|
|
123 |
|
|
|
124 blk_desc = &snap->blk_desc[i]; |
|
|
|
125 insert_icache(icache, blk_desc); |
|
|
|
126 } |
|
|
|
127 free(buf); |
|
|
|
128 compr_final(&ctx); |
|
|
|
129 return WALK_CONTINUE; |
|
|
|
130 } |
|
|
|
131 |
|
|
|
132 static void |
|
|
|
133 init(void) |
|
|
|
134 { |
|
|
|
135 ifd = open(SNAPSF, O_RDWR, 0600); |
|
|
|
136 if (ifd < 0) |
|
|
|
137 err(1, "open %s", SNAPSF); |
|
|
|
138 |
|
|
|
139 sfd = open(STOREF, O_RDWR, 0600); |
|
|
|
140 if (sfd < 0) |
|
|
|
141 err(1, "open %s", STOREF); |
|
|
|
142 |
|
|
|
143 if (flock(ifd, LOCK_NB | LOCK_EX) < 0 || |
|
|
|
144 flock(sfd, LOCK_NB | LOCK_EX) < 0) |
|
|
|
145 err(1, "flock"); |
|
|
|
146 |
|
|
|
147 |
|
|
|
148 xlseek(ifd, 0, SEEK_SET); |
|
|
|
149 load_snap_hdr(ifd, &snap_hdr); |
|
|
|
150 xlseek(sfd, 0, SEEK_SET); |
|
|
|
151 load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo); |
|
|
|
152 |
|
|
|
153 icache = alloc_icache(); |
|
|
|
154 walk_snap(ifd, &snap_hdr, build_icache, NULL); |
|
|
|
155 } |
|
|
|
156 |
|
|
|
157 static void |
|
|
|
158 term(void) |
|
|
|
159 { |
|
|
|
160 xlseek(ifd, 0, SEEK_SET); |
|
|
|
161 write_snap_hdr(ifd, &snap_hdr); |
|
|
|
162 xlseek(sfd, 0, SEEK_SET); |
|
|
|
163 write_blk_hdr(sfd, &blk_hdr); |
|
|
|
164 |
|
|
|
165 fsync(ifd); |
|
|
|
166 fsync(sfd); |
|
|
|
167 |
|
|
|
168 close(ifd); |
|
|
|
169 close(sfd); |
|
|
|
170 |
|
|
|
171 free_icache(icache); |
|
|
|
172 } |
|
|
|
173 |
|
|
|
174 static void |
|
|
|
175 usage(void) |
|
|
|
176 { |
|
|
|
177 fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0); |
|
|
|
178 exit(1); |
|
|
|
179 } |
|
|
|
180 |
|
|
|
181 int |
|
|
|
182 main(int argc, char *argv[]) |
|
|
|
183 { |
|
|
|
184 char *repo, *msg = NULL; |
|
|
|
185 |
|
|
|
186 ARGBEGIN { |
|
|
|
187 case 'm': |
|
|
|
188 msg = EARGF(usage()); |
|
|
|
189 break; |
|
|
|
190 case 'v': |
|
|
|
191 verbose++; |
|
|
|
192 break; |
|
|
|
193 default: |
|
|
|
194 usage(); |
|
|
|
195 } ARGEND |
|
|
|
196 |
|
|
|
197 switch (argc) { |
|
|
|
198 case 0: |
|
|
|
199 repo = "."; |
|
|
|
200 break; |
|
|
|
201 case 1: |
|
|
|
202 repo = argv[0]; |
|
|
|
203 break; |
|
|
|
204 default: |
|
|
|
205 usage(); |
|
|
|
206 }; |
|
|
|
207 |
|
|
|
208 if (chdir(repo) < 0) |
|
|
|
209 err(1, "chdir: %s", repo); |
|
|
|
210 |
|
|
|
211 init(); |
|
|
|
212 dedup(STDIN_FILENO, msg); |
|
|
|
213 term(); |
|
|
|
214 return 0; |
|
|
|
215 } |
|