AyCode.Core/AyCode.Core/Compression/Lz4Decompressor.cs

246 lines
8.6 KiB
C#

using System.Buffers;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Compression;
/// <summary>
/// Pure managed LZ4 decompressor. Works on all platforms including WASM.
/// Implements LZ4 block format decompression.
/// </summary>
public static class Lz4Decompressor
{
private const int MinMatch = 4;
private const int MatchLengthBits = 4;
private const int LiteralLengthBits = 4;
private const int RunMask = (1 << LiteralLengthBits) - 1;
private const int MatchMask = (1 << MatchLengthBits) - 1;
/// <summary>
/// Decompresses LZ4 block format data.
/// </summary>
/// <param name="source">Compressed data.</param>
/// <param name="originalSize">Expected size of decompressed data.</param>
/// <returns>Decompressed data.</returns>
public static byte[] Decompress(ReadOnlySpan<byte> source, int originalSize)
{
if (source.Length == 0)
return [];
if (originalSize <= 0)
throw new ArgumentException("Original size must be positive.", nameof(originalSize));
var output = new byte[originalSize];
var decompressedLength = DecompressCore(source, output);
if (decompressedLength != originalSize)
throw new InvalidDataException($"Decompressed size mismatch. Expected {originalSize}, got {decompressedLength}.");
return output;
}
/// <summary>
/// Decompresses LZ4 block format data into destination buffer.
/// </summary>
/// <param name="source">Compressed data.</param>
/// <param name="destination">Destination buffer for decompressed data.</param>
/// <returns>Number of bytes written to destination.</returns>
public static int Decompress(ReadOnlySpan<byte> source, Span<byte> destination)
{
if (source.Length == 0)
return 0;
return DecompressCore(source, destination);
}
/// <summary>
/// Decompresses LZ4 BlockArray format data (chunked compression).
/// </summary>
/// <param name="source">Compressed data with chunk headers.</param>
/// <returns>Decompressed data.</returns>
public static byte[] DecompressBlockArray(ReadOnlySpan<byte> source)
{
if (source.Length < 4)
return [];
var srcIndex = 0;
// Read number of chunks
var numChunks = ReadInt32LittleEndian(source, srcIndex);
srcIndex += 4;
if (numChunks <= 0)
return [];
// Calculate total output size
var totalOriginalSize = 0;
var tempIndex = srcIndex;
for (var i = 0; i < numChunks; i++)
{
if (tempIndex + 8 > source.Length)
throw new InvalidDataException("Invalid BlockArray format: truncated header.");
var originalChunkSize = ReadInt32LittleEndian(source, tempIndex);
var compressedChunkSize = ReadInt32LittleEndian(source, tempIndex + 4);
totalOriginalSize += originalChunkSize;
tempIndex += 8 + compressedChunkSize;
}
var output = new byte[totalOriginalSize];
var outputIndex = 0;
// Decompress each chunk
for (var i = 0; i < numChunks; i++)
{
var originalChunkSize = ReadInt32LittleEndian(source, srcIndex);
srcIndex += 4;
var compressedChunkSize = ReadInt32LittleEndian(source, srcIndex);
srcIndex += 4;
var compressedChunk = source.Slice(srcIndex, compressedChunkSize);
var decompressedLength = DecompressCore(compressedChunk, output.AsSpan(outputIndex, originalChunkSize));
if (decompressedLength != originalChunkSize)
throw new InvalidDataException($"Chunk decompression failed. Expected {originalChunkSize}, got {decompressedLength}.");
outputIndex += originalChunkSize;
srcIndex += compressedChunkSize;
}
return output;
}
/// <summary>
/// Tries to decompress LZ4 block format data.
/// </summary>
/// <param name="source">Compressed data.</param>
/// <param name="destination">Destination buffer for decompressed data.</param>
/// <param name="bytesWritten">Number of bytes written to destination.</param>
/// <returns>True if decompression succeeded, false otherwise.</returns>
public static bool TryDecompress(ReadOnlySpan<byte> source, Span<byte> destination, out int bytesWritten)
{
bytesWritten = 0;
if (source.Length == 0)
return true;
try
{
bytesWritten = DecompressCore(source, destination);
return true;
}
catch
{
return false;
}
}
private static int DecompressCore(ReadOnlySpan<byte> source, Span<byte> destination)
{
var srcIndex = 0;
var dstIndex = 0;
var srcLength = source.Length;
var dstLength = destination.Length;
while (srcIndex < srcLength)
{
// Read token
var token = source[srcIndex++];
// Decode literal length
var literalLength = token >> MatchLengthBits;
if (literalLength == RunMask)
{
int additionalLength;
do
{
if (srcIndex >= srcLength)
throw new InvalidDataException("Unexpected end of input while reading literal length.");
additionalLength = source[srcIndex++];
literalLength += additionalLength;
} while (additionalLength == 255);
}
// Copy literals
if (literalLength > 0)
{
if (srcIndex + literalLength > srcLength)
throw new InvalidDataException("Unexpected end of input while reading literals.");
if (dstIndex + literalLength > dstLength)
throw new InvalidDataException("Output buffer overflow while writing literals.");
source.Slice(srcIndex, literalLength).CopyTo(destination.Slice(dstIndex));
srcIndex += literalLength;
dstIndex += literalLength;
}
// Check if we're at the end (no match after last literals)
if (srcIndex >= srcLength)
break;
// Decode offset
if (srcIndex + 2 > srcLength)
throw new InvalidDataException("Unexpected end of input while reading offset.");
var offset = source[srcIndex] | (source[srcIndex + 1] << 8);
srcIndex += 2;
if (offset == 0)
throw new InvalidDataException("Invalid offset: 0.");
// Decode match length
var matchLength = (token & MatchMask) + MinMatch;
if ((token & MatchMask) == MatchMask)
{
int additionalLength;
do
{
if (srcIndex >= srcLength)
throw new InvalidDataException("Unexpected end of input while reading match length.");
additionalLength = source[srcIndex++];
matchLength += additionalLength;
} while (additionalLength == 255);
}
// Copy match
var matchStart = dstIndex - offset;
if (matchStart < 0)
throw new InvalidDataException($"Invalid match offset: {offset} at position {dstIndex}.");
if (dstIndex + matchLength > dstLength)
throw new InvalidDataException("Output buffer overflow while writing match.");
// Handle overlapping copy
CopyMatch(destination, dstIndex, matchStart, matchLength);
dstIndex += matchLength;
}
return dstIndex;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CopyMatch(Span<byte> buffer, int dstIndex, int matchStart, int matchLength)
{
var offset = dstIndex - matchStart;
// For non-overlapping copies, use fast path
if (offset >= matchLength)
{
buffer.Slice(matchStart, matchLength).CopyTo(buffer.Slice(dstIndex));
return;
}
// Overlapping copy - must copy byte by byte
// This handles the case where we're copying from recently written data
for (var i = 0; i < matchLength; i++)
{
buffer[dstIndex + i] = buffer[matchStart + i];
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ReadInt32LittleEndian(ReadOnlySpan<byte> data, int index)
{
return data[index] | (data[index + 1] << 8) | (data[index + 2] << 16) | (data[index + 3] << 24);
}
}