OpenCore  1.0.4
OpenCore Bootloader
Loading...
Searching...
No Matches
lzvn.c
Go to the documentation of this file.
1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22// LZVN low-level decoder
23
24#include "lzvn.h"
25
26#ifndef assert
27# define assert(x) do { } while (0)
28#endif
29
30#if defined(_MSC_VER) && !defined(__clang__)
31# define LZFSE_INLINE __forceinline
32# define __builtin_expect(X, Y) (X)
33# define __attribute__(X)
34# pragma warning(disable : 4068) // warning C4068: unknown pragma
35#else
36# define LZFSE_INLINE static inline __attribute__((__always_inline__))
37#endif
38
40#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__)
41typedef int64_t lzvn_offset;
42#else
44#endif
45
47typedef struct {
48
49 // Decoder I/O
50
51 // Next byte to read in source buffer
52 const unsigned char *src;
53 // Next byte after source buffer
54 const unsigned char *src_end;
55
56 // Next byte to write in destination buffer (by decoder)
57 unsigned char *dst;
58 // Valid range for destination buffer is [dst_begin, dst_end - 1]
59 unsigned char *dst_begin;
60 unsigned char *dst_end;
61 // Next byte to read in destination buffer (modified by caller)
62 unsigned char *dst_current;
63
64 // Decoder state
65
66 // Partially expanded match, or 0,0,0.
67 // In that case, src points to the next literal to copy, or the next op-code
68 // if L==0.
69 size_t L, M, D;
70
71 // Distance for last emitted match, or 0
73
74 // Did we decode end-of-stream?
76
78
80LZFSE_INLINE uint16_t load2(const void *ptr) {
81 uint16_t data;
82 memcpy(&data, ptr, sizeof data);
83 return data;
84}
85
86LZFSE_INLINE uint32_t load4(const void *ptr) {
87 uint32_t data;
88 memcpy(&data, ptr, sizeof data);
89 return data;
90}
91
92LZFSE_INLINE uint64_t load8(const void *ptr) {
93 uint64_t data;
94 memcpy(&data, ptr, sizeof data);
95 return data;
96}
97
99LZFSE_INLINE void store4(void *ptr, uint32_t data) {
100 memcpy(ptr, &data, sizeof data);
101}
102
103LZFSE_INLINE void store8(void *ptr, uint64_t data) {
104 memcpy(ptr, &data, sizeof data);
105}
106
109LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb,
110 unsigned width) {
111 static const size_t container_width = sizeof container * 8;
112 assert(lsb < container_width);
113 assert(width > 0 && width <= container_width);
114 assert(lsb + width <= container_width);
115 if (width == container_width)
116 return container;
117 return (container >> lsb) & (((uintmax_t)1 << width) - 1);
118}
119
120#if !defined(HAVE_LABELS_AS_VALUES)
121# if defined(__GNUC__) || defined(__clang__)
122# define HAVE_LABELS_AS_VALUES 1
123# else
124# define HAVE_LABELS_AS_VALUES 0
125# endif
126#endif
127
128// Both the source and destination buffers are represented by a pointer and
129// a length; they are *always* updated in concert using this macro; however
130// many bytes the pointer is advanced, the length is decremented by the same
131// amount. Thus, pointer + length always points to the byte one past the end
132// of the buffer.
133#define PTR_LEN_INC(_pointer, _length, _increment) \
134 (_pointer += _increment, _length -= _increment)
135
136// Update state with current positions and distance, corresponding to the
137// beginning of an instruction in both streams
138#define UPDATE_GOOD \
139 (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
140
144#if HAVE_LABELS_AS_VALUES
145 // Jump table for all instructions
146 static const void *opc_tbl[256] = {
147 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
148 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
149 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
150 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
151 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
152 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
153 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
154 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
155 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
156 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
157 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
158 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
159 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
160 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
161 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
162 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
163 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
164 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
165 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
166 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
167 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
168 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
169 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
170 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
171 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
172 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
173 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
174 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
175 &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
176 &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
177 &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
178 &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
179#endif
180 size_t src_len = state->src_end - state->src;
181 size_t dst_len = state->dst_end - state->dst;
182 if (src_len == 0 || dst_len == 0)
183 return; // empty buffer
184
185 const unsigned char *src_ptr = state->src;
186 unsigned char *dst_ptr = state->dst;
187 size_t D = state->d_prev;
188 size_t M;
189 size_t L;
190 size_t opc_len;
191
192 // Do we have a partially expanded match saved in state?
193 if (state->L != 0 || state->M != 0) {
194 L = state->L;
195 M = state->M;
196 D = state->D;
197 opc_len = 0; // we already skipped the op
198 state->L = state->M = state->D = 0;
199 if (M == 0)
200 goto copy_literal;
201 if (L == 0)
202 goto copy_match;
203 goto copy_literal_and_match;
204 }
205
206 unsigned char opc = src_ptr[0];
207
208#if HAVE_LABELS_AS_VALUES
209 goto *opc_tbl[opc];
210#else
211 for (;;) {
212 switch (opc) {
213#endif
214// ===============================================================
215// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
216// literal and a match. The bulk of their implementations are shared;
217// each label here only does the work of setting the opcode length (not
218// including any literal bytes), and extracting the literal length, match
219// length, and match distance (except in pre_d). They then jump into the
220// shared implementation to actually output the literal and match bytes.
221//
222// No error checking happens in the first stage, except for ensuring that
223// the source has enough length to represent the full opcode before
224// reading past the first byte.
225#if HAVE_LABELS_AS_VALUES
226sml_d:
227#else
228 case 0:
229 case 1:
230 case 2:
231 case 3:
232 case 4:
233 case 5:
234 case 8:
235 case 9:
236 case 10:
237 case 11:
238 case 12:
239 case 13:
240 case 16:
241 case 17:
242 case 18:
243 case 19:
244 case 20:
245 case 21:
246 case 24:
247 case 25:
248 case 26:
249 case 27:
250 case 28:
251 case 29:
252 case 32:
253 case 33:
254 case 34:
255 case 35:
256 case 36:
257 case 37:
258 case 40:
259 case 41:
260 case 42:
261 case 43:
262 case 44:
263 case 45:
264 case 48:
265 case 49:
266 case 50:
267 case 51:
268 case 52:
269 case 53:
270 case 56:
271 case 57:
272 case 58:
273 case 59:
274 case 60:
275 case 61:
276 case 64:
277 case 65:
278 case 66:
279 case 67:
280 case 68:
281 case 69:
282 case 72:
283 case 73:
284 case 74:
285 case 75:
286 case 76:
287 case 77:
288 case 80:
289 case 81:
290 case 82:
291 case 83:
292 case 84:
293 case 85:
294 case 88:
295 case 89:
296 case 90:
297 case 91:
298 case 92:
299 case 93:
300 case 96:
301 case 97:
302 case 98:
303 case 99:
304 case 100:
305 case 101:
306 case 104:
307 case 105:
308 case 106:
309 case 107:
310 case 108:
311 case 109:
312 case 128:
313 case 129:
314 case 130:
315 case 131:
316 case 132:
317 case 133:
318 case 136:
319 case 137:
320 case 138:
321 case 139:
322 case 140:
323 case 141:
324 case 144:
325 case 145:
326 case 146:
327 case 147:
328 case 148:
329 case 149:
330 case 152:
331 case 153:
332 case 154:
333 case 155:
334 case 156:
335 case 157:
336 case 192:
337 case 193:
338 case 194:
339 case 195:
340 case 196:
341 case 197:
342 case 200:
343 case 201:
344 case 202:
345 case 203:
346 case 204:
347 case 205:
348#endif
350 // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
351 // where the length of literal (0-3 bytes) is encoded by the high 2 bits of
352 // the first byte. We first extract the literal length so we know how long
353 // the opcode is, then check that the source can hold both this opcode and
354 // at least one byte of the next (because any valid input stream must be
355 // terminated with an eos token).
356 opc_len = 2;
357 L = (size_t)extract(opc, 6, 2);
358 M = (size_t)extract(opc, 3, 3) + 3;
359 // We need to ensure that the source buffer is long enough that we can
360 // safely read this entire opcode, the literal that follows, and the first
361 // byte of the next opcode. Once we satisfy this requirement, we can
362 // safely unpack the match distance. A check similar to this one is
363 // present in all the opcode implementations.
364 if (src_len <= opc_len + L)
365 return; // source truncated
366 D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
367 goto copy_literal_and_match;
368
369#if HAVE_LABELS_AS_VALUES
370med_d:
371#else
372 case 160:
373 case 161:
374 case 162:
375 case 163:
376 case 164:
377 case 165:
378 case 166:
379 case 167:
380 case 168:
381 case 169:
382 case 170:
383 case 171:
384 case 172:
385 case 173:
386 case 174:
387 case 175:
388 case 176:
389 case 177:
390 case 178:
391 case 179:
392 case 180:
393 case 181:
394 case 182:
395 case 183:
396 case 184:
397 case 185:
398 case 186:
399 case 187:
400 case 188:
401 case 189:
402 case 190:
403 case 191:
404#endif
406 // "medium distance": This is a minor variant of the "small distance"
407 // encoding, where we will now use two extra bytes instead of one to encode
408 // the restof the match length and distance. This allows an extra two bits
409 // for the match length, and an extra three bits for the match distance. The
410 // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
411 opc_len = 3;
412 L = (size_t)extract(opc, 3, 2);
413 if (src_len <= opc_len + L)
414 return; // source truncated
415 uint16_t opc23 = load2(&src_ptr[1]);
416 M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
417 D = (size_t)extract(opc23, 2, 14);
418 goto copy_literal_and_match;
419
420#if HAVE_LABELS_AS_VALUES
421lrg_d:
422#else
423 case 7:
424 case 15:
425 case 23:
426 case 31:
427 case 39:
428 case 47:
429 case 55:
430 case 63:
431 case 71:
432 case 79:
433 case 87:
434 case 95:
435 case 103:
436 case 111:
437 case 135:
438 case 143:
439 case 151:
440 case 159:
441 case 199:
442 case 207:
443#endif
445 // "large distance": This is another variant of the "small distance"
446 // encoding, where we will now use two extra bytes to encode the match
447 // distance, which allows distances up to 65535 to be represented. The full
448 // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
449 opc_len = 3;
450 L = (size_t)extract(opc, 6, 2);
451 M = (size_t)extract(opc, 3, 3) + 3;
452 if (src_len <= opc_len + L)
453 return; // source truncated
454 D = load2(&src_ptr[1]);
455 goto copy_literal_and_match;
456
457#if HAVE_LABELS_AS_VALUES
458pre_d:
459#else
460 case 70:
461 case 78:
462 case 86:
463 case 94:
464 case 102:
465 case 110:
466 case 134:
467 case 142:
468 case 150:
469 case 158:
470 case 198:
471 case 206:
472#endif
474 // "previous distance": This opcode has the structure LLMMM110, where the
475 // length of the literal (0-3 bytes) is encoded by the high 2 bits of the
476 // first byte. We first extract the literal length so we know how long
477 // the opcode is, then check that the source can hold both this opcode and
478 // at least one byte of the next (because any valid input stream must be
479 // terminated with an eos token).
480 opc_len = 1;
481 L = (size_t)extract(opc, 6, 2);
482 M = (size_t)extract(opc, 3, 3) + 3;
483 if (src_len <= opc_len + L)
484 return; // source truncated
485 goto copy_literal_and_match;
486
487copy_literal_and_match:
488 // Common implementation of writing data for opcodes that have both a
489 // literal and a match. We begin by advancing the source pointer past
490 // the opcode, so that it points at the first literal byte (if L
491 // is non-zero; otherwise it points at the next opcode).
492 PTR_LEN_INC(src_ptr, src_len, opc_len);
493 // Now we copy the literal from the source pointer to the destination.
494 if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
495 // The literal is 0-3 bytes; if we are not near the end of the buffer,
496 // we can safely just do a 4 byte copy (which is guaranteed to cover
497 // the complete literal, and may include some other bytes as well).
498 store4(dst_ptr, load4(src_ptr));
499 } else if (L <= dst_len) {
500 // We are too close to the end of either the input or output stream
501 // to be able to safely use a four-byte copy, but we will not exhaust
502 // either stream (we already know that the source will not be
503 // exhausted from checks in the individual opcode implementations,
504 // and we just tested that dst_len > L). Thus, we need to do a
505 // byte-by-byte copy of the literal. This is slow, but it can only ever
506 // happen near the very end of a buffer, so it is not an important case to
507 // optimize.
508 for (size_t i = 0; i < L; ++i)
509 dst_ptr[i] = src_ptr[i];
510 } else {
511 // Destination truncated: fill DST, and store partial match
512
513 // Copy partial literal
514 for (size_t i = 0; i < dst_len; ++i)
515 dst_ptr[i] = src_ptr[i];
516 // Save state
517 state->src = src_ptr + dst_len;
518 state->dst = dst_ptr + dst_len;
519 state->L = L - dst_len;
520 state->M = M;
521 state->D = D;
522 return; // destination truncated
523 }
524 // Having completed the copy of the literal, we advance both the source
525 // and destination pointers by the number of literal bytes.
526 PTR_LEN_INC(dst_ptr, dst_len, L);
527 PTR_LEN_INC(src_ptr, src_len, L);
528 // Check if the match distance is valid; matches must not reference
529 // bytes that preceed the start of the output buffer, nor can the match
530 // distance be zero.
531 if (D > (size_t)(dst_ptr - state->dst_begin) || D == 0)
532 goto invalid_match_distance;
533copy_match:
534 // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
535 // in mind that we may have D < M, in which case the source and destination
536 // windows overlap in the copy. The semantics of the match copy are *not*
537 // those of memmove( ); if the buffers overlap it needs to behave as though
538 // we were copying byte-by-byte in increasing address order. If, for example,
539 // D is 1, the copy operation is equivalent to:
540 //
541 // memset(dst_ptr, dst_ptr[-1], M);
542 //
543 // i.e. it splats the previous byte. This means that we need to be very
544 // careful about using wide loads or stores to perform the copy operation.
545 if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
546 // We are not near the end of the buffer, and the match distance
547 // is at least eight. Thus, we can safely loop using eight byte
548 // copies. The last of these may slop over the intended end of
549 // the match, but this is OK because we know we have a safety bound
550 // away from the end of the destination buffer.
551 for (size_t i = 0; i < M; i += 8)
552 store8(&dst_ptr[i], load8(dst_ptr + i - D));
553 } else if (M <= dst_len) {
554 // Either the match distance is too small, or we are too close to
555 // the end of the buffer to safely use eight byte copies. Fall back
556 // on a simple byte-by-byte implementation.
557 for (size_t i = 0; i < M; ++i)
558 dst_ptr[i] = *(dst_ptr + i - D);
559 } else {
560 // Destination truncated: fill DST, and store partial match
561
562 // Copy partial match
563 for (size_t i = 0; i < dst_len; ++i)
564 dst_ptr[i] = *(dst_ptr + i - D);
565 // Save state
566 state->src = src_ptr;
567 state->dst = dst_ptr + dst_len;
568 state->L = 0;
569 state->M = M - dst_len;
570 state->D = D;
571 return; // destination truncated
572 }
573 // Update the destination pointer and length to account for the bytes
574 // written by the match, then load the next opcode byte and branch to
575 // the appropriate implementation.
576 PTR_LEN_INC(dst_ptr, dst_len, M);
577 opc = src_ptr[0];
578#if HAVE_LABELS_AS_VALUES
579 goto *opc_tbl[opc];
580#else
581 break;
582#endif
583
584// ===============================================================
585// Opcodes representing only a match (no literal).
586// These two opcodes (lrg_m and sml_m) encode only a match. The match
587// distance is carried over from the previous opcode, so all they need
588// to encode is the match length. We are able to reuse the match copy
589// sequence from the literal and match opcodes to perform the actual
590// copy implementation.
591#if HAVE_LABELS_AS_VALUES
592sml_m:
593#else
594 case 241:
595 case 242:
596 case 243:
597 case 244:
598 case 245:
599 case 246:
600 case 247:
601 case 248:
602 case 249:
603 case 250:
604 case 251:
605 case 252:
606 case 253:
607 case 254:
608 case 255:
609#endif
611 // "small match": This opcode has no literal, and uses the previous match
612 // distance (i.e. it encodes only the match length), in a single byte as
613 // 1111MMMM.
614 opc_len = 1;
615 if (src_len <= opc_len)
616 return; // source truncated
617 M = (size_t)extract(opc, 0, 4);
618 PTR_LEN_INC(src_ptr, src_len, opc_len);
619 goto copy_match;
620
621#if HAVE_LABELS_AS_VALUES
622lrg_m:
623#else
624 case 240:
625#endif
627 // "large match": This opcode has no literal, and uses the previous match
628 // distance (i.e. it encodes only the match length). It is encoded in two
629 // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
630 // be represented by sml_m, there is an implicit bias of 16 on the match
631 // length; the representable values are [16,271].
632 opc_len = 2;
633 if (src_len <= opc_len)
634 return; // source truncated
635 M = src_ptr[1] + 16;
636 PTR_LEN_INC(src_ptr, src_len, opc_len);
637 goto copy_match;
638
639// ===============================================================
640// Opcodes representing only a literal (no match).
641// These two opcodes (lrg_l and sml_l) encode only a literal. There is no
642// match length or match distance to worry about (but we need to *not*
643// touch D, as it must be preserved between opcodes).
644#if HAVE_LABELS_AS_VALUES
645sml_l:
646#else
647 case 225:
648 case 226:
649 case 227:
650 case 228:
651 case 229:
652 case 230:
653 case 231:
654 case 232:
655 case 233:
656 case 234:
657 case 235:
658 case 236:
659 case 237:
660 case 238:
661 case 239:
662#endif
664 // "small literal": This opcode has no match, and encodes only a literal
665 // of length up to 15 bytes. The format is 1110LLLL LITERAL.
666 opc_len = 1;
667 L = (size_t)extract(opc, 0, 4);
668 goto copy_literal;
669
670#if HAVE_LABELS_AS_VALUES
671lrg_l:
672#else
673 case 224:
674#endif
676 // "large literal": This opcode has no match, and uses the previous match
677 // distance (i.e. it encodes only the match length). It is encoded in two
678 // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
679 // bytes can be represented by sml_l, there is an implicit bias of 16 on
680 // the literal length; the representable values are [16,271].
681 opc_len = 2;
682 if (src_len <= 2)
683 return; // source truncated
684 L = src_ptr[1] + 16;
685 goto copy_literal;
686
687copy_literal:
688 // Check that the source buffer is large enough to hold the complete
689 // literal and at least the first byte of the next opcode. If so, advance
690 // the source pointer to point to the first byte of the literal and adjust
691 // the source length accordingly.
692 if (src_len <= opc_len + L)
693 return; // source truncated
694 PTR_LEN_INC(src_ptr, src_len, opc_len);
695 // Now we copy the literal from the source pointer to the destination.
696 if (dst_len >= L + 7 && src_len >= L + 7) {
697 // We are not near the end of the source or destination buffers; thus
698 // we can safely copy the literal using wide copies, without worrying
699 // about reading or writing past the end of either buffer.
700 for (size_t i = 0; i < L; i += 8)
701 store8(&dst_ptr[i], load8(&src_ptr[i]));
702 } else if (L <= dst_len) {
703 // We are too close to the end of either the input or output stream
704 // to be able to safely use an eight-byte copy. Instead we copy the
705 // literal byte-by-byte.
706 for (size_t i = 0; i < L; ++i)
707 dst_ptr[i] = src_ptr[i];
708 } else {
709 // Destination truncated: fill DST, and store partial match
710
711 // Copy partial literal
712 for (size_t i = 0; i < dst_len; ++i)
713 dst_ptr[i] = src_ptr[i];
714 // Save state
715 state->src = src_ptr + dst_len;
716 state->dst = dst_ptr + dst_len;
717 state->L = L - dst_len;
718 state->M = 0;
719 state->D = D;
720 return; // destination truncated
721 }
722 // Having completed the copy of the literal, we advance both the source
723 // and destination pointers by the number of literal bytes.
724 PTR_LEN_INC(dst_ptr, dst_len, L);
725 PTR_LEN_INC(src_ptr, src_len, L);
726 // Load the first byte of the next opcode, and jump to its implementation.
727 opc = src_ptr[0];
728#if HAVE_LABELS_AS_VALUES
729 goto *opc_tbl[opc];
730#else
731 break;
732#endif
733
734// ===============================================================
735// Other opcodes
736#if HAVE_LABELS_AS_VALUES
737nop:
738#else
739 case 14:
740 case 22:
741#endif
743 opc_len = 1;
744 if (src_len <= opc_len)
745 return; // source truncated
746 PTR_LEN_INC(src_ptr, src_len, opc_len);
747 opc = src_ptr[0];
748#if HAVE_LABELS_AS_VALUES
749 goto *opc_tbl[opc];
750#else
751 break;
752#endif
753
754#if HAVE_LABELS_AS_VALUES
755eos:
756#else
757 case 6:
758#endif
759 opc_len = 8;
760 if (src_len < opc_len)
761 return; // source truncated (here we don't need an extra byte for next op
762 // code)
763 PTR_LEN_INC(src_ptr, src_len, opc_len);
764 state->end_of_stream = 1;
766 return; // end-of-stream
767
768// ===============================================================
769// Return on error
770#if HAVE_LABELS_AS_VALUES
771udef:
772#else
773 case 30:
774 case 38:
775 case 46:
776 case 54:
777 case 62:
778 case 112:
779 case 113:
780 case 114:
781 case 115:
782 case 116:
783 case 117:
784 case 118:
785 case 119:
786 case 120:
787 case 121:
788 case 122:
789 case 123:
790 case 124:
791 case 125:
792 case 126:
793 case 127:
794 case 208:
795 case 209:
796 case 210:
797 case 211:
798 case 212:
799 case 213:
800 case 214:
801 case 215:
802 case 216:
803 case 217:
804 case 218:
805 case 219:
806 case 220:
807 case 221:
808 case 222:
809 case 223:
810#endif
811invalid_match_distance:
812
813 return; // we already updated state
814#if !HAVE_LABELS_AS_VALUES
815 }
816 }
817#endif
818}
819
820size_t lzvn_decode_buffer(unsigned char *dst, size_t dst_size,
821 const unsigned char *src, size_t src_size) {
822 // Init LZVN decoder state
823 lzvn_decoder_state dstate;
824
825 if (dst_size > OC_COMPRESSION_MAX_LENGTH || src_size > OC_COMPRESSION_MAX_LENGTH) {
826 return 0;
827 }
828
829 memset(&dstate, 0x00, sizeof(dstate));
830 dstate.src = src;
831 dstate.src_end = src + src_size;
832
833 dstate.dst_begin = dst;
834 dstate.dst = dst;
835 dstate.dst_end = dst + dst_size;
836
837 dstate.d_prev = 0;
838 dstate.end_of_stream = 0;
839
840 // Run LZVN decoder
841 lzvn_decode(&dstate);
842
843 // This is how much we decompressed
844 return dstate.dst - dst;
845}
UINT16 width
Definition BmfFile.h:85
#define OC_COMPRESSION_MAX_LENGTH
#define size_t
Definition Ubsan.h:87
#define memset(ptr, c, len)
UINT16 uint16_t
UINT32 uint32_t
UINT64 uint64_t
INT32 int32_t
Definition lzss.h:34
void lzvn_decode(lzvn_decoder_state *state)
Definition lzvn.c:143
LZFSE_INLINE uint32_t load4(const void *ptr)
Definition lzvn.c:86
LZFSE_INLINE void store8(void *ptr, uint64_t data)
Definition lzvn.c:103
LZFSE_INLINE uint64_t load8(const void *ptr)
Definition lzvn.c:92
#define UPDATE_GOOD
Definition lzvn.c:138
#define PTR_LEN_INC(_pointer, _length, _increment)
Definition lzvn.c:133
#define LZFSE_INLINE
Definition lzvn.c:36
LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb, unsigned width)
Definition lzvn.c:109
LZFSE_INLINE void store4(void *ptr, uint32_t data)
Definition lzvn.c:99
LZFSE_INLINE uint16_t load2(const void *ptr)
Definition lzvn.c:80
int32_t lzvn_offset
Definition lzvn.c:43
#define assert(x)
Definition lzvn.c:27
#define lzvn_decode_buffer
Definition lzvn.h:21
UINTN uintmax_t
Definition lzvn.h:46
#define memcpy(Dst, Src, Size)
Definition lzvn.h:57
INT64 int64_t
Definition lzvn.h:43
lzvn_offset d_prev
Definition lzvn.c:72
unsigned char * dst_end
Definition lzvn.c:60
const unsigned char * src
Definition lzvn.c:52
int end_of_stream
Definition lzvn.c:75
unsigned char * dst_current
Definition lzvn.c:62
unsigned char * dst_begin
Definition lzvn.c:59
const unsigned char * src_end
Definition lzvn.c:54
unsigned char * dst
Definition lzvn.c:57