246 lines
8.6 KiB
C#
246 lines
8.6 KiB
C#
using System.Buffers;
|
|
using System.Runtime.CompilerServices;
|
|
|
|
namespace AyCode.Core.Compression;
|
|
|
|
/// <summary>
|
|
/// Pure managed LZ4 decompressor. Works on all platforms including WASM.
|
|
/// Implements LZ4 block format decompression.
|
|
/// </summary>
|
|
public static class Lz4Decompressor
|
|
{
|
|
private const int MinMatch = 4;
|
|
private const int MatchLengthBits = 4;
|
|
private const int LiteralLengthBits = 4;
|
|
private const int RunMask = (1 << LiteralLengthBits) - 1;
|
|
private const int MatchMask = (1 << MatchLengthBits) - 1;
|
|
|
|
/// <summary>
|
|
/// Decompresses LZ4 block format data.
|
|
/// </summary>
|
|
/// <param name="source">Compressed data.</param>
|
|
/// <param name="originalSize">Expected size of decompressed data.</param>
|
|
/// <returns>Decompressed data.</returns>
|
|
public static byte[] Decompress(ReadOnlySpan<byte> source, int originalSize)
|
|
{
|
|
if (source.Length == 0)
|
|
return [];
|
|
|
|
if (originalSize <= 0)
|
|
throw new ArgumentException("Original size must be positive.", nameof(originalSize));
|
|
|
|
var output = new byte[originalSize];
|
|
var decompressedLength = DecompressCore(source, output);
|
|
|
|
if (decompressedLength != originalSize)
|
|
throw new InvalidDataException($"Decompressed size mismatch. Expected {originalSize}, got {decompressedLength}.");
|
|
|
|
return output;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Decompresses LZ4 block format data into destination buffer.
|
|
/// </summary>
|
|
/// <param name="source">Compressed data.</param>
|
|
/// <param name="destination">Destination buffer for decompressed data.</param>
|
|
/// <returns>Number of bytes written to destination.</returns>
|
|
public static int Decompress(ReadOnlySpan<byte> source, Span<byte> destination)
|
|
{
|
|
if (source.Length == 0)
|
|
return 0;
|
|
|
|
return DecompressCore(source, destination);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Decompresses LZ4 BlockArray format data (chunked compression).
|
|
/// </summary>
|
|
/// <param name="source">Compressed data with chunk headers.</param>
|
|
/// <returns>Decompressed data.</returns>
|
|
public static byte[] DecompressBlockArray(ReadOnlySpan<byte> source)
|
|
{
|
|
if (source.Length < 4)
|
|
return [];
|
|
|
|
var srcIndex = 0;
|
|
|
|
// Read number of chunks
|
|
var numChunks = ReadInt32LittleEndian(source, srcIndex);
|
|
srcIndex += 4;
|
|
|
|
if (numChunks <= 0)
|
|
return [];
|
|
|
|
// Calculate total output size
|
|
var totalOriginalSize = 0;
|
|
var tempIndex = srcIndex;
|
|
for (var i = 0; i < numChunks; i++)
|
|
{
|
|
if (tempIndex + 8 > source.Length)
|
|
throw new InvalidDataException("Invalid BlockArray format: truncated header.");
|
|
|
|
var originalChunkSize = ReadInt32LittleEndian(source, tempIndex);
|
|
var compressedChunkSize = ReadInt32LittleEndian(source, tempIndex + 4);
|
|
totalOriginalSize += originalChunkSize;
|
|
tempIndex += 8 + compressedChunkSize;
|
|
}
|
|
|
|
var output = new byte[totalOriginalSize];
|
|
var outputIndex = 0;
|
|
|
|
// Decompress each chunk
|
|
for (var i = 0; i < numChunks; i++)
|
|
{
|
|
var originalChunkSize = ReadInt32LittleEndian(source, srcIndex);
|
|
srcIndex += 4;
|
|
|
|
var compressedChunkSize = ReadInt32LittleEndian(source, srcIndex);
|
|
srcIndex += 4;
|
|
|
|
var compressedChunk = source.Slice(srcIndex, compressedChunkSize);
|
|
var decompressedLength = DecompressCore(compressedChunk, output.AsSpan(outputIndex, originalChunkSize));
|
|
|
|
if (decompressedLength != originalChunkSize)
|
|
throw new InvalidDataException($"Chunk decompression failed. Expected {originalChunkSize}, got {decompressedLength}.");
|
|
|
|
outputIndex += originalChunkSize;
|
|
srcIndex += compressedChunkSize;
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tries to decompress LZ4 block format data.
|
|
/// </summary>
|
|
/// <param name="source">Compressed data.</param>
|
|
/// <param name="destination">Destination buffer for decompressed data.</param>
|
|
/// <param name="bytesWritten">Number of bytes written to destination.</param>
|
|
/// <returns>True if decompression succeeded, false otherwise.</returns>
|
|
public static bool TryDecompress(ReadOnlySpan<byte> source, Span<byte> destination, out int bytesWritten)
|
|
{
|
|
bytesWritten = 0;
|
|
|
|
if (source.Length == 0)
|
|
return true;
|
|
|
|
try
|
|
{
|
|
bytesWritten = DecompressCore(source, destination);
|
|
return true;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static int DecompressCore(ReadOnlySpan<byte> source, Span<byte> destination)
|
|
{
|
|
var srcIndex = 0;
|
|
var dstIndex = 0;
|
|
var srcLength = source.Length;
|
|
var dstLength = destination.Length;
|
|
|
|
while (srcIndex < srcLength)
|
|
{
|
|
// Read token
|
|
var token = source[srcIndex++];
|
|
|
|
// Decode literal length
|
|
var literalLength = token >> MatchLengthBits;
|
|
if (literalLength == RunMask)
|
|
{
|
|
int additionalLength;
|
|
do
|
|
{
|
|
if (srcIndex >= srcLength)
|
|
throw new InvalidDataException("Unexpected end of input while reading literal length.");
|
|
additionalLength = source[srcIndex++];
|
|
literalLength += additionalLength;
|
|
} while (additionalLength == 255);
|
|
}
|
|
|
|
// Copy literals
|
|
if (literalLength > 0)
|
|
{
|
|
if (srcIndex + literalLength > srcLength)
|
|
throw new InvalidDataException("Unexpected end of input while reading literals.");
|
|
if (dstIndex + literalLength > dstLength)
|
|
throw new InvalidDataException("Output buffer overflow while writing literals.");
|
|
|
|
source.Slice(srcIndex, literalLength).CopyTo(destination.Slice(dstIndex));
|
|
srcIndex += literalLength;
|
|
dstIndex += literalLength;
|
|
}
|
|
|
|
// Check if we're at the end (no match after last literals)
|
|
if (srcIndex >= srcLength)
|
|
break;
|
|
|
|
// Decode offset
|
|
if (srcIndex + 2 > srcLength)
|
|
throw new InvalidDataException("Unexpected end of input while reading offset.");
|
|
|
|
var offset = source[srcIndex] | (source[srcIndex + 1] << 8);
|
|
srcIndex += 2;
|
|
|
|
if (offset == 0)
|
|
throw new InvalidDataException("Invalid offset: 0.");
|
|
|
|
// Decode match length
|
|
var matchLength = (token & MatchMask) + MinMatch;
|
|
if ((token & MatchMask) == MatchMask)
|
|
{
|
|
int additionalLength;
|
|
do
|
|
{
|
|
if (srcIndex >= srcLength)
|
|
throw new InvalidDataException("Unexpected end of input while reading match length.");
|
|
additionalLength = source[srcIndex++];
|
|
matchLength += additionalLength;
|
|
} while (additionalLength == 255);
|
|
}
|
|
|
|
// Copy match
|
|
var matchStart = dstIndex - offset;
|
|
if (matchStart < 0)
|
|
throw new InvalidDataException($"Invalid match offset: {offset} at position {dstIndex}.");
|
|
if (dstIndex + matchLength > dstLength)
|
|
throw new InvalidDataException("Output buffer overflow while writing match.");
|
|
|
|
// Handle overlapping copy
|
|
CopyMatch(destination, dstIndex, matchStart, matchLength);
|
|
dstIndex += matchLength;
|
|
}
|
|
|
|
return dstIndex;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static void CopyMatch(Span<byte> buffer, int dstIndex, int matchStart, int matchLength)
|
|
{
|
|
var offset = dstIndex - matchStart;
|
|
|
|
// For non-overlapping copies, use fast path
|
|
if (offset >= matchLength)
|
|
{
|
|
buffer.Slice(matchStart, matchLength).CopyTo(buffer.Slice(dstIndex));
|
|
return;
|
|
}
|
|
|
|
// Overlapping copy - must copy byte by byte
|
|
// This handles the case where we're copying from recently written data
|
|
for (var i = 0; i < matchLength; i++)
|
|
{
|
|
buffer[dstIndex + i] = buffer[matchStart + i];
|
|
}
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static int ReadInt32LittleEndian(ReadOnlySpan<byte> data, int index)
|
|
{
|
|
return data[index] | (data[index + 1] << 8) | (data[index + 2] << 16) | (data[index + 3] << 24);
|
|
}
|
|
}
|