Added DEZ library

This commit is contained in:
Anuken 2018-06-07 17:23:32 -04:00
parent e451cdd519
commit 9aa5460688
6 changed files with 901 additions and 0 deletions

View file

@ -0,0 +1,101 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
/**
* The interface for encoding a delta.
* <p>
* A delta encoder will implement a specific file/transfer format.
*/
public interface ByteDeltaEncoder {
/**
* Initialises creating a new patch.
*
* @param sourceSize
* @param targetSize
*/
public void init(int sourceSize, int targetSize);
/**
* Appends a copy command.
*
* @param addr
* @param len
*/
public void copy(int addr, int len);
/**
* Appends an append command.
*
* @param data
* @param off
* @param len
*/
public void add(byte[] data, int off, int len);
/**
* Appends a byte-run.
*
* @param b
* @param len
*/
public void run(byte b, int len);
/**
* Retrieves the patch.
*
* @return
*/
public byte[] toPatch();
/**
* Creates a delta from a matcher and writes it to an encoder.
*
* @param matcher
* @param enc
* @return
*/
public static byte[] toDiff(ByteMatcher matcher, ByteDeltaEncoder enc) {
byte[] source = matcher.getSource();
byte[] target = matcher.getTarget();
enc.init(source.length, target.length);
int targetEnd = 0;
int state;
while ((state = matcher.nextMatch()) != ByteMatcher.EOF) {
int toff = matcher.getTargetOffset();
int slength = matcher.getLength();
if (targetEnd != toff)
enc.add(target, targetEnd, toff - targetEnd);
if (state == ByteMatcher.RUN)
enc.run(matcher.getRunByte(), slength);
else
enc.copy(matcher.getMatchOffset(), slength);
targetEnd = toff + slength;
}
if (targetEnd != target.length)
enc.add(target, targetEnd, target.length - targetEnd);
return enc.toPatch();
}
}

View file

@ -0,0 +1,109 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
/**
* Common interface for byte matchers.
* <p>
* Byte matchers look for common sub-strings between a source and
* a target byte array and may optionally detect runs of duplicated
* bytes.
*/
public interface ByteMatcher {
public final static int COPY = 0;
public final static int RUN = 1;
public static final int EOF = -1;
/**
* Finds the next match or run.
* <p>
* Note that only matches or byte runs will be indicated. The location
* of non-matching data (i.e. append sequences) must be determined from
* the difference between the last targetOffset, the last length, and the
* current targetOffset.
* </p>
*
* @return the new state.
*/
public int nextMatch();
/**
* Retrieves the current target position.
* <p>
* The position within the target to which the current match refers.
*
* @return
*/
public int getTargetOffset();
/**
* Retrieves the best match location.
* <p>
* If the current state is COPY then this returns a valid location
* of the best match. This should be interpreted
* using {@link #getBlockArray} and {@link #getBlockOffset}.
*
* @return
*/
public int getMatchOffset();
/**
* Retrieves the byte to be run-length encoded.
* <p>
* If the current state is RUN then this returns the corresponding byte to run.
*
* @return
*/
public byte getRunByte();
/**
* Retrieves the current length.
* <p>
* This is the number of bytes to copy for the COPY state or repeat for the RUN state.
*
* @return
*/
public int getLength();
/**
* Retrieves the array containing the current match.
* <p>
* Maps the offset to the correct internal array.
*
* @param offset
* @return
* @see #getBlockOffset
*/
public byte[] getBlockArray(int offset);
/**
* Calculates the offset for the block array.
* <p>
* Maps the match offset to the array from <code>getBlockArray</code>.
*
* @param offset
* @return
* @see #getBlockArray
*/
public int getBlockOffset(int offset);
public byte[] getSource();
public byte[] getTarget();
}

View file

@ -0,0 +1,300 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
import java.util.Arrays;
/**
* Finds common strings of bytes between a source and target buffer.
* <p>
* This is basically an implementation of Bentley &amp; McIllroy's paper
* ``Data Compression Using Long Common Strings'' applied instead to producing
* deltas and using a cyclic hash as the fingerprint function.
* <p>
* Two other
* modifications are that back-tracking is not implemented but instead
* overlapping blocks can be used by setting the step size.
* And a further refinement is the detection of runs of the same byte which
* might otherwise pollute the hash tree for certain data.
*/
public class ByteMatcherHash implements ByteMatcher {
private final int b;
private final int shortest;
private final byte[] source;
private final int sstep;
private final byte[] target;
// Incremental hashes
private final CyclicHash targetHash;
private final CyclicHash sourceHash;
// Runtime state
private int ti;
private int thash;
private int skipTo;
private int targetAvailable;
// Public state
private int bestLength;
private int bestOffset;
private int targetOffset;
private byte runByte;
/**
* Inline hash+array table.
* <p>
* All values which hash the same are appended to the same list.
* <p>
* Index is the current length/next insertion point for that hash chain.
* Values contains the chained hash table values.
*/
final private int hashMask;
final private int[][] hashValues;
/**
* Creates and initialises a new byte matcher.
* <p>
* This is a single-use object.
* <p>
* A step size of 1 produces the best output but requires the most memory and run time.
* <p>
* @param b Sets block size, which is the number of bytes hashed per key (&amp;=3).
* @param shortest shortest string considered for a copy. Typically 4 bytes but dependent on the encoder used and
* the value of b.
* @param source Source array.
* @param sstep Sets the step size which is the interval of sampling of the source.
* @param target Target array.
*/
public ByteMatcherHash(int b, int shortest, byte[] source, int sstep, byte[] target) {
int size;
b = Math.max(b, 3);
// This may need tuning.
int logN = 31 - Integer.numberOfLeadingZeros((source.length + target.length) / sstep);
size = 1 << Math.max(14, logN - 5);
hashMask = size - 1;
hashValues = new int[size][];
targetHash = new CyclicHash(b);
sourceHash = new CyclicHash(b);
this.b = b;
this.shortest = shortest;
this.source = source;
this.sstep = sstep;
this.target = target;
addAll(source, source.length, 0, 0);
if (target.length >= b)
this.thash = targetHash.init(target, 0);
}
/**
* Checks for run of 3 bytes.
* <p>
* Boundaries are not checked.
*
* @param s
* @param pos
* @return
*/
private boolean isRun(byte[] s, int pos) {
byte v = s[pos];
return v == s[pos + 1] && v == s[pos + 2];
}
private int addAll(byte[] s, int limit, int pos, int off) {
if (sstep == 1) {
if (pos == 0 && limit >= b) {
add(sourceHash.init(s, 0), off);
pos = 1;
}
while (pos <= limit - b) {
int hash = sourceHash.update(s[pos - 1], s[pos - 1 + b]);
if (!isRun(s, pos))
add(hash, pos + off);
pos += 1;
}
} else {
while (pos <= limit - b) {
if (!isRun(s, pos))
add(sourceHash.init(s, pos), pos + off);
pos += sstep;
}
}
return pos;
}
private void add(int hash, int value) {
int j = hash & hashMask;
int[] vs = hashValues[j];
if (vs == null) {
hashValues[j] = vs = new int[4];
vs[0] = 2;
vs[1] = value;
} else {
int i = vs[0];
if (i >= vs.length)
hashValues[j] = vs = Arrays.copyOf(vs, vs.length * 2);
vs[i++] = value;
vs[0] = i;
}
}
/**
* Finds the length of similarity between the two sub-arrays.
* <p>
*
* @param soff source offset starting location, locations above source.length refer to the target buffer.
* @param toff target offset starting location
* @return how many bytes are sequentially identical.
*/
private int matchLength(int soff, int toff) {
if (soff < source.length) {
int limit = Math.min(source.length - soff, target.length - toff);
for (int i = 0; i < limit; i++)
if (source[soff + i] != target[toff + i])
return i;
return limit;
} else {
soff -= source.length;
int limit = Math.min(target.length - soff, target.length - toff);
for (int i = 0; i < limit; i++)
if (target[soff + i] != target[toff + i])
return i;
return limit;
}
}
@Override
public byte[] getSource() {
return source;
}
@Override
public byte[] getTarget() {
return target;
}
@Override
public int getMatchOffset() {
return bestOffset;
}
@Override
public int getTargetOffset() {
return targetOffset;
}
@Override
public int getLength() {
return bestLength;
}
@Override
public byte getRunByte() {
return runByte;
}
@Override
public int nextMatch() {
bestLength = 0;
bestOffset = 0;
/**
* Reset thash on seek.
*/
if (skipTo != ti) {
if (skipTo <= target.length - b)
thash = targetHash.init(target, skipTo);
ti = skipTo;
}
while (bestLength < shortest && ti <= target.length - b) {
/**
* short circuit test for byte-runs.
*/
if (isRun(target, ti)) {
byte b0 = target[ti];
int j = ti + 3;
while (j < target.length && target[j] == b0)
j++;
targetOffset = ti;
bestLength = j - ti;
runByte = b0;
skipTo = j;
return RUN;
}
/**
* Include any of the target buffer which has been decoded to this point.
*/
targetAvailable = addAll(target, ti + b - 1, targetAvailable, source.length);
/**
* Checks the current string for the longest match.
*/
int j = thash & hashMask;
int[] soffs = hashValues[j];
if (soffs != null) {
int len = soffs[0];
for (int i = 1; i < len; i++) {
int soff = soffs[i];
int length = matchLength(soff, ti);
if (length > bestLength) {
bestLength = length;
bestOffset = soff;
}
}
}
/**
* Advance. thash is always the next block to examine.
*/
targetOffset = ti;
ti += 1;
if (ti <= target.length - b)
thash = targetHash.update(target[ti - 1], target[ti - 1 + b]);
}
if (bestLength >= shortest) {
skipTo = targetOffset + bestLength;
return COPY;
} else
return EOF;
}
@Override
public byte[] getBlockArray(int offset) {
return (offset < source.length) ? source : target;
}
@Override
public int getBlockOffset(int offset) {
return (offset < source.length) ? offset : offset - source.length;
}
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
import java.util.Random;
import static java.lang.Integer.rotateLeft;
/**
* Cyclic polynomial rolling hash.
* <p>
* This implements a rolling hash of a fixed length.
* <p>
* Input bytes are hashed using a random table. The randomness
* affects the quality of the hash.
*/
public class CyclicHash {
private static final int[] random;
private final int b;
private int hash;
private final int first;
static {
// keyboard bashed the results unvalidated.
Random r = new Random(97435);
random = new int[256];
for(int i = 0; i < random.length; i ++){
random[i] = r.nextInt();
}
}
/**
* Creates a cyclic hash.
*
* @param b
*/
public CyclicHash(int b) {
this.b = b;
this.first = ((b - 1) * 9) & 31;
}
/**
* Initialises the hash.
* <p>
* This will hash a block of data at the given location.
*
* @param data
* @param off
* @return
*/
public int init(byte[] data, int off) {
hash = 0;
for (int i = 0; i < b; i++)
hash = rotateLeft(hash, 9) ^ random[data[i + off] & 0xff];
return hash;
}
/**
* Updates the hash incrementally.
* <p>
* Advance the hash by one location.
*
* @param leave the byte leaving. Must match the oldest byte included in the hash value.
* @param enter the byte entering.
* @return
*/
public int update(byte leave, byte enter) {
int leaving = rotateLeft(random[leave & 0xff], first);
int entering = random[enter & 0xff];
hash = rotateLeft(hash ^ leaving, 9) ^ entering;
return hash;
}
}

View file

@ -0,0 +1,140 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
import java.io.IOException;
/**
* An in-memory DeltaZ-1 decoder.
* <p>
* Transforms a source and patch into a target.
* <p>
*/
public class DEZDecoder {
private final byte[] patch;
private final byte[] source;
private int pi, si;
public DEZDecoder(byte[] src, byte[] patch) {
this.patch = patch;
this.source = src;
}
private int decodeInt() {
int v = 0;
byte b;
int limit = Math.min(patch.length, pi + 5);
do {
b = patch[pi++];
v = (v << 7) | (b & 0x7f);
} while (pi < limit && (b & 0x80) != 0);
return v;
}
/**
* On entry pi points to the opcode, which is also in 'op'.
*
* @param op
* @return
*/
private int decodeLength(int op) {
int length = op & 0x1f;
pi++;
while ((op & 0x80) != 0) {
op = patch[pi++];
length = (length << 7) | (op & 0x7f);
}
return length;
}
/**
* Recreates the original target data from the source and patch.
*
* @return
* @throws IOException
*/
public byte[] decode() throws IOException, ArrayIndexOutOfBoundsException {
byte[] target;
int ti = 0;
pi = 0;
si = 0;
// 'decode' magic
for (int i = 0; i < DEZEncoder.MAGIC.length; i++)
if (patch[i] != DEZEncoder.MAGIC[i])
throw new IOException("Invalid magic");
pi += 4;
// 'decode' flags
if (patch[pi] != 0)
throw new IOException("Unknown flags");
pi += 1;
// get sizes
int sourceSize = decodeInt();
int targetSize = decodeInt();
if (sourceSize != source.length)
throw new IOException("Patch/source size mismatch");
target = new byte[targetSize];
/**
* Decode loop.
* <p>
* Since java will check the array accesses anyway, don't clutter the code with our own.
*/
while (ti < targetSize) {
byte op = patch[pi];
byte r;
if ((op & 0x40) == 0) {
// COPY
int length = decodeInt();
int addr = decodeInt();
if (addr < sourceSize)
for (int i = 0; i < length; i++)
target[ti++] = source[addr + i];
else
for (int i = 0; i < length; i++)
target[ti++] = target[addr - sourceSize + i];
} else if ((op & 0x20) == 0) {
// ADD
int length = decodeLength(op);
for (int i = 0; i <= length; i++)
target[ti++] = patch[pi++];
} else {
// RUN
int length = decodeLength(op);
r = patch[pi++];
for (int i = 0; i <= length; i++)
target[ti++] = r;
}
}
return target;
}
}

View file

@ -0,0 +1,161 @@
/*
* Copyright (C) 2015 Michael Zucchi
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.anuke.mindustry.net.delta;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
* 'DeltaZ-1' format encoder.
* <p>
* Encoder for very simple binary delta format.
* <p>
* <h3>Header</h3>
* <pre>
* magic: 'D' 'E' 'Z' '1'
* flags: one byte
* source size: one integer
* target size: one integer
* instructions follow directly
* ?? no epilogue defined ??
* </pre>
* <p>
* Integers are encoded as a compacted big-endian sequence
* with 7 bits per byte. Leading zero septets are discarded.
* The MSB of each byte is a continue bit which indicates
* another 7 bits are to be read.
* <p>
* <h3>Instruction stream</h3>
* <pre>
* x0000000 - reserved
* 00XXXXXX CAAAAAAA* - copy + 6 bit length + address
* 10XXXXXX CXXXXXXX* CAAAAAAA* - copy + extended length + address
* 010XXXXX DDDDDDDD* - add + 5 bit length - 1 + sequence
* 110XXXXX CXXXXXXX* DDDDDDDD* - add + extended length - 1 + sequence
* 011XXXXX RRRRRRRR - run + 5 bit count - 1 + byte
* 111XXXXX CXXXXXXX* RRRRRRRR - run + extended count - 1 + byte
* </pre>
* <p>
* Opcodes include a length encoded as an integer.
* <dl>
* <dt>COPY
* <dd>The opcode/length followed by an absolute address of the source of
* the copy. COPY is decodeable directly as an integer. A length of 0 is reserved.
* <p>
* The address covers the range of the source buffer concatenated with as much
* of the target buffer as has currently been decoded. The address+length will
* not span buffers.
* <dt>ADD
* <dd>The opcode/length followed by (length+1) bytes of data
* to copy to the current output location.
* <dt>RUN
* <dd>The opcode/length followed by a byte to be duplicated
* into the current output stream (length+1) times.
* </dl>
*
*/
public class DEZEncoder implements ByteDeltaEncoder {
private final ByteArrayOutputStream patch = new ByteArrayOutputStream();
private final byte[] work = new byte[6];
public static final byte[] MAGIC = {'D', 'E', 'Z', '1'};
public static final int COPY = 0x00;
public static final int COPY_EXT = 0x80;
public static final int ADD = 0x40;
public static final int ADD_EXT = 0xc0;
public static final int RUN = 0x60;
public static final int RUN_EXT = 0xe0;
public void init(int sourceSize, int targetSize) {
try {
patch.reset();
patch.write(MAGIC);
// some flags
patch.write(0);
encodeInt(sourceSize);
encodeInt(targetSize);
} catch (IOException ex) {
ex.printStackTrace();
}
}
/**
* Encode an opcode + length.
*
* @param op opcode. extend bit is added automatically.
* @param max maximum size of value that can fit in the first byte inclusive. Leave room for opcode bits.
* @param len length to encode.
*/
private void encodeOp(int op, int max, int len) {
if (len <= max) {
patch.write((byte) (len | op));
} else {
int i = work.length;
int cont = 0;
while (len > max) {
work[--i] = (byte) ((len & 0x7f) | cont);
len >>= 7;
cont = 0x80;
}
work[--i] = (byte) (len | 0x80 | op);
patch.write(work, i, work.length - i);
}
}
/**
* Encodes an integer.
* <p>
* Format is big-endian order encoded as:
* <p>
* CXXXXXXX
* <p>
* Where C is the continue bit.
*
*/
void encodeInt(int addr) {
int i = work.length;
int cont = 0;
while (addr > 0x7f) {
work[--i] = (byte) ((addr & 0x7f) | cont);
addr >>= 7;
cont = 0x80;
}
work[--i] = (byte) (addr | cont);
patch.write(work, i, work.length - i);
}
public void copy(int addr, int len) {
encodeOp(COPY, 0x3f, len);
encodeInt(addr);
}
public void add(byte[] data, int off, int len) {
encodeOp(ADD, 0x1f, len - 1);
patch.write(data, off, len);
}
public void run(byte b, int len) {
encodeOp(RUN, 0x1f, len - 1);
patch.write(b);
}
public byte[] toPatch() {
return patch.toByteArray();
}
}