Add pure managed LZ4 compression to serializers
Implemented LZ4 compression/decompression in pure managed code, compatible with all platforms including WASM. Added new helpers (`Lz4`, `Lz4Compressor`, `Lz4Decompressor`) and a `Lz4CompressionMode` enum. Updated `AcBinarySerializerOptions` to support compression, and modified all relevant serializer methods to apply LZ4 when enabled. Benchmarks and buffer handling updated to support zero-allocation compression. No native dependencies required.
This commit is contained in:
parent
3da902b575
commit
18370879ec
|
|
@ -1,13 +1,15 @@
|
|||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using AyCode.Core.Compression;
|
||||
using AyCode.Core.Serializers.Binaries;
|
||||
using AyCode.Core.Serializers.Jsons;
|
||||
using AyCode.Core.Tests.TestModels;
|
||||
using MessagePack;
|
||||
using MessagePack.Resolvers;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Newtonsoft.Json;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace AyCode.Core.Serializers.Console;
|
||||
|
||||
|
|
@ -486,6 +488,8 @@ public static class Program
|
|||
_options = options;
|
||||
Name = name;
|
||||
_serialized = AcBinarySerializer.Serialize(order, options);
|
||||
|
||||
//_options.UseCompression = Lz4CompressionMode.Block;
|
||||
}
|
||||
|
||||
public void Warmup(int iterations)
|
||||
|
|
@ -553,6 +557,7 @@ public static class Program
|
|||
_order = order;
|
||||
Name = name;
|
||||
_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.None);
|
||||
//_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.Lz4Block);
|
||||
_serialized = MessagePackSerializer.Serialize(order, _options);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,206 @@
|
|||
namespace AyCode.Core.Compression;
|
||||
|
||||
/// <summary>
|
||||
/// High-level LZ4 compression helper. Pure managed implementation that works on all platforms including WASM.
|
||||
/// </summary>
|
||||
public static class Lz4
|
||||
{
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 Block format.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <returns>Compressed data with 4-byte original size header.</returns>
|
||||
public static byte[] CompressBlock(byte[] data)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(data);
|
||||
return CompressBlock(data.AsSpan());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 Block format.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <returns>Compressed data with 4-byte original size header.</returns>
|
||||
public static byte[] CompressBlock(ReadOnlySpan<byte> data)
|
||||
{
|
||||
if (data.Length == 0)
|
||||
return [];
|
||||
|
||||
var compressed = Lz4Compressor.Compress(data);
|
||||
|
||||
// Prepend original size (4 bytes, little-endian)
|
||||
var result = new byte[4 + compressed.Length];
|
||||
WriteInt32LittleEndian(result, 0, data.Length);
|
||||
compressed.CopyTo(result.AsSpan(4));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 Block format data.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data with 4-byte original size header.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] DecompressBlock(byte[] compressedData)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(compressedData);
|
||||
return DecompressBlock(compressedData.AsSpan());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 Block format data.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data with 4-byte original size header.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] DecompressBlock(ReadOnlySpan<byte> compressedData)
|
||||
{
|
||||
if (compressedData.Length < 4)
|
||||
return [];
|
||||
|
||||
var originalSize = ReadInt32LittleEndian(compressedData, 0);
|
||||
if (originalSize <= 0)
|
||||
return [];
|
||||
|
||||
var compressed = compressedData.Slice(4);
|
||||
return Lz4Decompressor.Decompress(compressed, originalSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 BlockArray format (chunked compression).
|
||||
/// Better for large data and streaming scenarios.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <param name="chunkSize">Size of each chunk (default 64KB).</param>
|
||||
/// <returns>Compressed data with chunk headers.</returns>
|
||||
public static byte[] CompressBlockArray(byte[] data, int chunkSize = Lz4Compressor.DefaultChunkSize)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(data);
|
||||
return CompressBlockArray(data.AsSpan(), chunkSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 BlockArray format (chunked compression).
|
||||
/// Better for large data and streaming scenarios.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <param name="chunkSize">Size of each chunk (default 64KB).</param>
|
||||
/// <returns>Compressed data with chunk headers.</returns>
|
||||
public static byte[] CompressBlockArray(ReadOnlySpan<byte> data, int chunkSize = Lz4Compressor.DefaultChunkSize)
|
||||
{
|
||||
if (data.Length == 0)
|
||||
return [];
|
||||
|
||||
return Lz4Compressor.CompressBlockArray(data, chunkSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 BlockArray format data.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data with chunk headers.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] DecompressBlockArray(byte[] compressedData)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(compressedData);
|
||||
return DecompressBlockArray(compressedData.AsSpan());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 BlockArray format data.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data with chunk headers.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] DecompressBlockArray(ReadOnlySpan<byte> compressedData)
|
||||
{
|
||||
if (compressedData.Length < 4)
|
||||
return [];
|
||||
|
||||
return Lz4Decompressor.DecompressBlockArray(compressedData);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using the specified compression mode.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <param name="mode">Compression mode.</param>
|
||||
/// <returns>Compressed data.</returns>
|
||||
public static byte[] Compress(byte[] data, Lz4CompressionMode mode)
|
||||
{
|
||||
return mode switch
|
||||
{
|
||||
Lz4CompressionMode.None => data,
|
||||
Lz4CompressionMode.Block => CompressBlock(data),
|
||||
Lz4CompressionMode.BlockArray => CompressBlockArray(data),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Invalid compression mode.")
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using the specified compression mode.
|
||||
/// </summary>
|
||||
/// <param name="data">Data to compress.</param>
|
||||
/// <param name="mode">Compression mode.</param>
|
||||
/// <returns>Compressed data.</returns>
|
||||
public static byte[] Compress(ReadOnlySpan<byte> data, Lz4CompressionMode mode)
|
||||
{
|
||||
return mode switch
|
||||
{
|
||||
Lz4CompressionMode.None => data.ToArray(),
|
||||
Lz4CompressionMode.Block => CompressBlock(data),
|
||||
Lz4CompressionMode.BlockArray => CompressBlockArray(data),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Invalid compression mode.")
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses data using the specified compression mode.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data.</param>
|
||||
/// <param name="mode">Compression mode used during compression.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] Decompress(byte[] compressedData, Lz4CompressionMode mode)
|
||||
{
|
||||
return mode switch
|
||||
{
|
||||
Lz4CompressionMode.None => compressedData,
|
||||
Lz4CompressionMode.Block => DecompressBlock(compressedData),
|
||||
Lz4CompressionMode.BlockArray => DecompressBlockArray(compressedData),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Invalid compression mode.")
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses data using the specified compression mode.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">Compressed data.</param>
|
||||
/// <param name="mode">Compression mode used during compression.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] Decompress(ReadOnlySpan<byte> compressedData, Lz4CompressionMode mode)
|
||||
{
|
||||
return mode switch
|
||||
{
|
||||
Lz4CompressionMode.None => compressedData.ToArray(),
|
||||
Lz4CompressionMode.Block => DecompressBlock(compressedData),
|
||||
Lz4CompressionMode.BlockArray => DecompressBlockArray(compressedData),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Invalid compression mode.")
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum compressed length for a given input length.
|
||||
/// </summary>
|
||||
public static int GetMaxCompressedLength(int inputLength)
|
||||
=> Lz4Compressor.GetMaxCompressedLength(inputLength);
|
||||
|
||||
private static void WriteInt32LittleEndian(byte[] buffer, int offset, int value)
|
||||
{
|
||||
buffer[offset] = (byte)value;
|
||||
buffer[offset + 1] = (byte)(value >> 8);
|
||||
buffer[offset + 2] = (byte)(value >> 16);
|
||||
buffer[offset + 3] = (byte)(value >> 24);
|
||||
}
|
||||
|
||||
private static int ReadInt32LittleEndian(ReadOnlySpan<byte> buffer, int offset)
|
||||
{
|
||||
return buffer[offset] | (buffer[offset + 1] << 8) | (buffer[offset + 2] << 16) | (buffer[offset + 3] << 24);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
namespace AyCode.Core.Compression;
|
||||
|
||||
/// <summary>
|
||||
/// LZ4 compression mode.
|
||||
/// </summary>
|
||||
public enum Lz4CompressionMode
|
||||
{
|
||||
/// <summary>
|
||||
/// No compression.
|
||||
/// </summary>
|
||||
None = 0,
|
||||
|
||||
/// <summary>
|
||||
/// LZ4 block compression. Compresses entire payload as single block.
|
||||
/// Better compression ratio, requires full buffer in memory.
|
||||
/// </summary>
|
||||
Block = 1,
|
||||
|
||||
/// <summary>
|
||||
/// LZ4 block array compression. Compresses in 64KB chunks.
|
||||
/// Slightly worse compression ratio, but streaming-friendly and lower memory usage.
|
||||
/// </summary>
|
||||
BlockArray = 2
|
||||
}
|
||||
|
|
@ -0,0 +1,372 @@
|
|||
using System.Buffers;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace AyCode.Core.Compression;
|
||||
|
||||
/// <summary>
|
||||
/// Pure managed LZ4 compressor. Works on all platforms including WASM.
|
||||
/// Implements LZ4 block format compression.
|
||||
/// </summary>
|
||||
public static class Lz4Compressor
|
||||
{
|
||||
private const int HashLog = 12;
|
||||
private const int HashTableSize = 1 << HashLog;
|
||||
private const int MinMatch = 4;
|
||||
private const int MaxInputSize = 0x7E000000; // ~2GB
|
||||
private const int MFLimit = 12; // Minimum match finding limit from end
|
||||
private const int LastLiterals = 5; // Last literals that cannot be matched
|
||||
private const int MatchLengthBits = 4;
|
||||
private const int LiteralLengthBits = 4;
|
||||
private const int RunMask = (1 << LiteralLengthBits) - 1;
|
||||
private const int MatchMask = (1 << MatchLengthBits) - 1;
|
||||
|
||||
/// <summary>
|
||||
/// Default chunk size for BlockArray mode (64KB).
|
||||
/// </summary>
|
||||
public const int DefaultChunkSize = 64 * 1024;
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 block format.
|
||||
/// </summary>
|
||||
/// <param name="source">Source data to compress.</param>
|
||||
/// <returns>Compressed data.</returns>
|
||||
public static byte[] Compress(ReadOnlySpan<byte> source)
|
||||
{
|
||||
if (source.Length == 0)
|
||||
return [];
|
||||
|
||||
if (source.Length > MaxInputSize)
|
||||
throw new ArgumentException($"Input too large. Maximum size is {MaxInputSize} bytes.", nameof(source));
|
||||
|
||||
var maxOutputSize = GetMaxCompressedLength(source.Length);
|
||||
var output = new byte[maxOutputSize];
|
||||
var compressedLength = CompressCore(source, output);
|
||||
|
||||
if (compressedLength == output.Length)
|
||||
return output;
|
||||
|
||||
var result = new byte[compressedLength];
|
||||
output.AsSpan(0, compressedLength).CopyTo(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 block format into the destination buffer.
|
||||
/// </summary>
|
||||
/// <param name="source">Source data to compress.</param>
|
||||
/// <param name="destination">Destination buffer for compressed data.</param>
|
||||
/// <returns>Number of bytes written to destination.</returns>
|
||||
public static int Compress(ReadOnlySpan<byte> source, Span<byte> destination)
|
||||
{
|
||||
if (source.Length == 0)
|
||||
return 0;
|
||||
|
||||
if (source.Length > MaxInputSize)
|
||||
throw new ArgumentException($"Input too large. Maximum size is {MaxInputSize} bytes.", nameof(source));
|
||||
|
||||
var maxOutputSize = GetMaxCompressedLength(source.Length);
|
||||
if (destination.Length < maxOutputSize)
|
||||
throw new ArgumentException($"Destination buffer too small. Need at least {maxOutputSize} bytes.", nameof(destination));
|
||||
|
||||
return CompressCore(source, destination);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses data using LZ4 BlockArray format (chunked compression).
|
||||
/// </summary>
|
||||
/// <param name="source">Source data to compress.</param>
|
||||
/// <param name="chunkSize">Size of each chunk (default 64KB).</param>
|
||||
/// <returns>Compressed data with chunk headers.</returns>
|
||||
public static byte[] CompressBlockArray(ReadOnlySpan<byte> source, int chunkSize = DefaultChunkSize)
|
||||
{
|
||||
if (source.Length == 0)
|
||||
return [];
|
||||
|
||||
if (chunkSize < 1024)
|
||||
throw new ArgumentException("Chunk size must be at least 1024 bytes.", nameof(chunkSize));
|
||||
|
||||
var numChunks = (source.Length + chunkSize - 1) / chunkSize;
|
||||
var maxOutputSize = 4 + (numChunks * (8 + GetMaxCompressedLength(chunkSize))); // 4 bytes for total chunks
|
||||
|
||||
using var output = new ArrayPoolWriter(maxOutputSize);
|
||||
|
||||
// Write number of chunks
|
||||
output.WriteInt32LittleEndian(numChunks);
|
||||
|
||||
var offset = 0;
|
||||
while (offset < source.Length)
|
||||
{
|
||||
var remaining = source.Length - offset;
|
||||
var currentChunkSize = Math.Min(remaining, chunkSize);
|
||||
var chunk = source.Slice(offset, currentChunkSize);
|
||||
|
||||
var compressedChunk = Compress(chunk);
|
||||
|
||||
// Write original size
|
||||
output.WriteInt32LittleEndian(currentChunkSize);
|
||||
// Write compressed size
|
||||
output.WriteInt32LittleEndian(compressedChunk.Length);
|
||||
// Write compressed data
|
||||
output.Write(compressedChunk);
|
||||
|
||||
offset += currentChunkSize;
|
||||
}
|
||||
|
||||
return output.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum compressed length for a given input length.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static int GetMaxCompressedLength(int inputLength)
|
||||
{
|
||||
// LZ4 worst case: input + (input / 255) + 16
|
||||
return inputLength + (inputLength / 255) + 16;
|
||||
}
|
||||
|
||||
private static int CompressCore(ReadOnlySpan<byte> source, Span<byte> destination)
|
||||
{
|
||||
var hashTable = ArrayPool<int>.Shared.Rent(HashTableSize);
|
||||
try
|
||||
{
|
||||
Array.Fill(hashTable, -1, 0, HashTableSize);
|
||||
return CompressInternal(source, destination, hashTable);
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<int>.Shared.Return(hashTable);
|
||||
}
|
||||
}
|
||||
|
||||
private static int CompressInternal(ReadOnlySpan<byte> source, Span<byte> destination, int[] hashTable)
|
||||
{
|
||||
var srcLength = source.Length;
|
||||
if (srcLength < MFLimit)
|
||||
{
|
||||
// Too small, just store as literals
|
||||
return StoreLiterals(source, destination);
|
||||
}
|
||||
|
||||
var srcIndex = 0;
|
||||
var dstIndex = 0;
|
||||
var anchor = 0;
|
||||
var srcEnd = srcLength;
|
||||
var mfLimit = srcEnd - MFLimit;
|
||||
var matchLimit = srcEnd - LastLiterals;
|
||||
|
||||
// First byte is always literal
|
||||
srcIndex++;
|
||||
|
||||
while (srcIndex < mfLimit)
|
||||
{
|
||||
// Find match
|
||||
var hash = GetHash(source, srcIndex);
|
||||
var matchIndex = hashTable[hash];
|
||||
hashTable[hash] = srcIndex;
|
||||
|
||||
// Check if we have a valid match
|
||||
if (matchIndex >= 0 &&
|
||||
srcIndex - matchIndex <= 65535 &&
|
||||
matchIndex >= anchor &&
|
||||
ReadUInt32(source, matchIndex) == ReadUInt32(source, srcIndex))
|
||||
{
|
||||
// Encode literals before match
|
||||
var literalLength = srcIndex - anchor;
|
||||
|
||||
// Calculate offset BEFORE extending the match
|
||||
var offset = srcIndex - matchIndex;
|
||||
|
||||
// Find match length (extend the match)
|
||||
var matchStart = srcIndex;
|
||||
var refIndex = matchIndex;
|
||||
|
||||
// Skip the 4 bytes we already matched
|
||||
srcIndex += MinMatch;
|
||||
refIndex += MinMatch;
|
||||
|
||||
// Extend the match
|
||||
while (srcIndex < matchLimit && source[srcIndex] == source[refIndex])
|
||||
{
|
||||
srcIndex++;
|
||||
refIndex++;
|
||||
}
|
||||
|
||||
var matchLength = srcIndex - matchStart - MinMatch;
|
||||
|
||||
// Encode token
|
||||
dstIndex = EncodeSequence(destination, dstIndex, source.Slice(anchor, literalLength), offset, matchLength);
|
||||
|
||||
anchor = srcIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
srcIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
// Encode remaining literals
|
||||
var lastLiterals = srcEnd - anchor;
|
||||
if (lastLiterals > 0)
|
||||
{
|
||||
dstIndex = EncodeLiteralsOnly(destination, dstIndex, source.Slice(anchor, lastLiterals));
|
||||
}
|
||||
|
||||
return dstIndex;
|
||||
}
|
||||
|
||||
private static int StoreLiterals(ReadOnlySpan<byte> source, Span<byte> destination)
|
||||
{
|
||||
return EncodeLiteralsOnly(destination, 0, source);
|
||||
}
|
||||
|
||||
private static int EncodeSequence(Span<byte> dst, int dstIndex, ReadOnlySpan<byte> literals, int offset, int matchLength)
|
||||
{
|
||||
var literalLength = literals.Length;
|
||||
|
||||
// Encode token
|
||||
var token = dstIndex++;
|
||||
|
||||
// Encode literal length
|
||||
if (literalLength >= RunMask)
|
||||
{
|
||||
dst[token] = (byte)(RunMask << MatchLengthBits);
|
||||
var remaining = literalLength - RunMask;
|
||||
while (remaining >= 255)
|
||||
{
|
||||
dst[dstIndex++] = 255;
|
||||
remaining -= 255;
|
||||
}
|
||||
dst[dstIndex++] = (byte)remaining;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[token] = (byte)(literalLength << MatchLengthBits);
|
||||
}
|
||||
|
||||
// Copy literals
|
||||
literals.CopyTo(dst.Slice(dstIndex));
|
||||
dstIndex += literalLength;
|
||||
|
||||
// Encode offset (little-endian)
|
||||
dst[dstIndex++] = (byte)offset;
|
||||
dst[dstIndex++] = (byte)(offset >> 8);
|
||||
|
||||
// Encode match length
|
||||
if (matchLength >= MatchMask)
|
||||
{
|
||||
dst[token] |= MatchMask;
|
||||
var remaining = matchLength - MatchMask;
|
||||
while (remaining >= 255)
|
||||
{
|
||||
dst[dstIndex++] = 255;
|
||||
remaining -= 255;
|
||||
}
|
||||
dst[dstIndex++] = (byte)remaining;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[token] |= (byte)matchLength;
|
||||
}
|
||||
|
||||
return dstIndex;
|
||||
}
|
||||
|
||||
private static int EncodeLiteralsOnly(Span<byte> dst, int dstIndex, ReadOnlySpan<byte> literals)
|
||||
{
|
||||
var literalLength = literals.Length;
|
||||
|
||||
// Encode token (no match)
|
||||
var token = dstIndex++;
|
||||
|
||||
// Encode literal length
|
||||
if (literalLength >= RunMask)
|
||||
{
|
||||
dst[token] = (byte)(RunMask << MatchLengthBits);
|
||||
var remaining = literalLength - RunMask;
|
||||
while (remaining >= 255)
|
||||
{
|
||||
dst[dstIndex++] = 255;
|
||||
remaining -= 255;
|
||||
}
|
||||
dst[dstIndex++] = (byte)remaining;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[token] = (byte)(literalLength << MatchLengthBits);
|
||||
}
|
||||
|
||||
// Copy literals
|
||||
literals.CopyTo(dst.Slice(dstIndex));
|
||||
dstIndex += literalLength;
|
||||
|
||||
return dstIndex;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static int GetHash(ReadOnlySpan<byte> data, int index)
|
||||
{
|
||||
var value = ReadUInt32(data, index);
|
||||
return (int)((value * 2654435761u) >> (32 - HashLog));
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static uint ReadUInt32(ReadOnlySpan<byte> data, int index)
|
||||
{
|
||||
return (uint)(data[index] | (data[index + 1] << 8) | (data[index + 2] << 16) | (data[index + 3] << 24));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Helper class for writing to a pooled array.
|
||||
/// </summary>
|
||||
private sealed class ArrayPoolWriter : IDisposable
|
||||
{
|
||||
private byte[] _buffer;
|
||||
private int _position;
|
||||
|
||||
public ArrayPoolWriter(int initialCapacity)
|
||||
{
|
||||
_buffer = ArrayPool<byte>.Shared.Rent(initialCapacity);
|
||||
_position = 0;
|
||||
}
|
||||
|
||||
public void WriteInt32LittleEndian(int value)
|
||||
{
|
||||
EnsureCapacity(4);
|
||||
_buffer[_position++] = (byte)value;
|
||||
_buffer[_position++] = (byte)(value >> 8);
|
||||
_buffer[_position++] = (byte)(value >> 16);
|
||||
_buffer[_position++] = (byte)(value >> 24);
|
||||
}
|
||||
|
||||
public void Write(ReadOnlySpan<byte> data)
|
||||
{
|
||||
EnsureCapacity(data.Length);
|
||||
data.CopyTo(_buffer.AsSpan(_position));
|
||||
_position += data.Length;
|
||||
}
|
||||
|
||||
private void EnsureCapacity(int additionalBytes)
|
||||
{
|
||||
if (_position + additionalBytes > _buffer.Length)
|
||||
{
|
||||
var newBuffer = ArrayPool<byte>.Shared.Rent(_buffer.Length * 2);
|
||||
_buffer.AsSpan(0, _position).CopyTo(newBuffer);
|
||||
ArrayPool<byte>.Shared.Return(_buffer);
|
||||
_buffer = newBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] ToArray()
|
||||
{
|
||||
var result = new byte[_position];
|
||||
_buffer.AsSpan(0, _position).CopyTo(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,245 @@
|
|||
using System.Buffers;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace AyCode.Core.Compression;
|
||||
|
||||
/// <summary>
|
||||
/// Pure managed LZ4 decompressor. Works on all platforms including WASM.
|
||||
/// Implements LZ4 block format decompression.
|
||||
/// </summary>
|
||||
public static class Lz4Decompressor
|
||||
{
|
||||
private const int MinMatch = 4;
|
||||
private const int MatchLengthBits = 4;
|
||||
private const int LiteralLengthBits = 4;
|
||||
private const int RunMask = (1 << LiteralLengthBits) - 1;
|
||||
private const int MatchMask = (1 << MatchLengthBits) - 1;
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 block format data.
|
||||
/// </summary>
|
||||
/// <param name="source">Compressed data.</param>
|
||||
/// <param name="originalSize">Expected size of decompressed data.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] Decompress(ReadOnlySpan<byte> source, int originalSize)
|
||||
{
|
||||
if (source.Length == 0)
|
||||
return [];
|
||||
|
||||
if (originalSize <= 0)
|
||||
throw new ArgumentException("Original size must be positive.", nameof(originalSize));
|
||||
|
||||
var output = new byte[originalSize];
|
||||
var decompressedLength = DecompressCore(source, output);
|
||||
|
||||
if (decompressedLength != originalSize)
|
||||
throw new InvalidDataException($"Decompressed size mismatch. Expected {originalSize}, got {decompressedLength}.");
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 block format data into destination buffer.
|
||||
/// </summary>
|
||||
/// <param name="source">Compressed data.</param>
|
||||
/// <param name="destination">Destination buffer for decompressed data.</param>
|
||||
/// <returns>Number of bytes written to destination.</returns>
|
||||
public static int Decompress(ReadOnlySpan<byte> source, Span<byte> destination)
|
||||
{
|
||||
if (source.Length == 0)
|
||||
return 0;
|
||||
|
||||
return DecompressCore(source, destination);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses LZ4 BlockArray format data (chunked compression).
|
||||
/// </summary>
|
||||
/// <param name="source">Compressed data with chunk headers.</param>
|
||||
/// <returns>Decompressed data.</returns>
|
||||
public static byte[] DecompressBlockArray(ReadOnlySpan<byte> source)
|
||||
{
|
||||
if (source.Length < 4)
|
||||
return [];
|
||||
|
||||
var srcIndex = 0;
|
||||
|
||||
// Read number of chunks
|
||||
var numChunks = ReadInt32LittleEndian(source, srcIndex);
|
||||
srcIndex += 4;
|
||||
|
||||
if (numChunks <= 0)
|
||||
return [];
|
||||
|
||||
// Calculate total output size
|
||||
var totalOriginalSize = 0;
|
||||
var tempIndex = srcIndex;
|
||||
for (var i = 0; i < numChunks; i++)
|
||||
{
|
||||
if (tempIndex + 8 > source.Length)
|
||||
throw new InvalidDataException("Invalid BlockArray format: truncated header.");
|
||||
|
||||
var originalChunkSize = ReadInt32LittleEndian(source, tempIndex);
|
||||
var compressedChunkSize = ReadInt32LittleEndian(source, tempIndex + 4);
|
||||
totalOriginalSize += originalChunkSize;
|
||||
tempIndex += 8 + compressedChunkSize;
|
||||
}
|
||||
|
||||
var output = new byte[totalOriginalSize];
|
||||
var outputIndex = 0;
|
||||
|
||||
// Decompress each chunk
|
||||
for (var i = 0; i < numChunks; i++)
|
||||
{
|
||||
var originalChunkSize = ReadInt32LittleEndian(source, srcIndex);
|
||||
srcIndex += 4;
|
||||
|
||||
var compressedChunkSize = ReadInt32LittleEndian(source, srcIndex);
|
||||
srcIndex += 4;
|
||||
|
||||
var compressedChunk = source.Slice(srcIndex, compressedChunkSize);
|
||||
var decompressedLength = DecompressCore(compressedChunk, output.AsSpan(outputIndex, originalChunkSize));
|
||||
|
||||
if (decompressedLength != originalChunkSize)
|
||||
throw new InvalidDataException($"Chunk decompression failed. Expected {originalChunkSize}, got {decompressedLength}.");
|
||||
|
||||
outputIndex += originalChunkSize;
|
||||
srcIndex += compressedChunkSize;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tries to decompress LZ4 block format data.
|
||||
/// </summary>
|
||||
/// <param name="source">Compressed data.</param>
|
||||
/// <param name="destination">Destination buffer for decompressed data.</param>
|
||||
/// <param name="bytesWritten">Number of bytes written to destination.</param>
|
||||
/// <returns>True if decompression succeeded, false otherwise.</returns>
|
||||
public static bool TryDecompress(ReadOnlySpan<byte> source, Span<byte> destination, out int bytesWritten)
|
||||
{
|
||||
bytesWritten = 0;
|
||||
|
||||
if (source.Length == 0)
|
||||
return true;
|
||||
|
||||
try
|
||||
{
|
||||
bytesWritten = DecompressCore(source, destination);
|
||||
return true;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static int DecompressCore(ReadOnlySpan<byte> source, Span<byte> destination)
|
||||
{
|
||||
var srcIndex = 0;
|
||||
var dstIndex = 0;
|
||||
var srcLength = source.Length;
|
||||
var dstLength = destination.Length;
|
||||
|
||||
while (srcIndex < srcLength)
|
||||
{
|
||||
// Read token
|
||||
var token = source[srcIndex++];
|
||||
|
||||
// Decode literal length
|
||||
var literalLength = token >> MatchLengthBits;
|
||||
if (literalLength == RunMask)
|
||||
{
|
||||
int additionalLength;
|
||||
do
|
||||
{
|
||||
if (srcIndex >= srcLength)
|
||||
throw new InvalidDataException("Unexpected end of input while reading literal length.");
|
||||
additionalLength = source[srcIndex++];
|
||||
literalLength += additionalLength;
|
||||
} while (additionalLength == 255);
|
||||
}
|
||||
|
||||
// Copy literals
|
||||
if (literalLength > 0)
|
||||
{
|
||||
if (srcIndex + literalLength > srcLength)
|
||||
throw new InvalidDataException("Unexpected end of input while reading literals.");
|
||||
if (dstIndex + literalLength > dstLength)
|
||||
throw new InvalidDataException("Output buffer overflow while writing literals.");
|
||||
|
||||
source.Slice(srcIndex, literalLength).CopyTo(destination.Slice(dstIndex));
|
||||
srcIndex += literalLength;
|
||||
dstIndex += literalLength;
|
||||
}
|
||||
|
||||
// Check if we're at the end (no match after last literals)
|
||||
if (srcIndex >= srcLength)
|
||||
break;
|
||||
|
||||
// Decode offset
|
||||
if (srcIndex + 2 > srcLength)
|
||||
throw new InvalidDataException("Unexpected end of input while reading offset.");
|
||||
|
||||
var offset = source[srcIndex] | (source[srcIndex + 1] << 8);
|
||||
srcIndex += 2;
|
||||
|
||||
if (offset == 0)
|
||||
throw new InvalidDataException("Invalid offset: 0.");
|
||||
|
||||
// Decode match length
|
||||
var matchLength = (token & MatchMask) + MinMatch;
|
||||
if ((token & MatchMask) == MatchMask)
|
||||
{
|
||||
int additionalLength;
|
||||
do
|
||||
{
|
||||
if (srcIndex >= srcLength)
|
||||
throw new InvalidDataException("Unexpected end of input while reading match length.");
|
||||
additionalLength = source[srcIndex++];
|
||||
matchLength += additionalLength;
|
||||
} while (additionalLength == 255);
|
||||
}
|
||||
|
||||
// Copy match
|
||||
var matchStart = dstIndex - offset;
|
||||
if (matchStart < 0)
|
||||
throw new InvalidDataException($"Invalid match offset: {offset} at position {dstIndex}.");
|
||||
if (dstIndex + matchLength > dstLength)
|
||||
throw new InvalidDataException("Output buffer overflow while writing match.");
|
||||
|
||||
// Handle overlapping copy
|
||||
CopyMatch(destination, dstIndex, matchStart, matchLength);
|
||||
dstIndex += matchLength;
|
||||
}
|
||||
|
||||
return dstIndex;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void CopyMatch(Span<byte> buffer, int dstIndex, int matchStart, int matchLength)
|
||||
{
|
||||
var offset = dstIndex - matchStart;
|
||||
|
||||
// For non-overlapping copies, use fast path
|
||||
if (offset >= matchLength)
|
||||
{
|
||||
buffer.Slice(matchStart, matchLength).CopyTo(buffer.Slice(dstIndex));
|
||||
return;
|
||||
}
|
||||
|
||||
// Overlapping copy - must copy byte by byte
|
||||
// This handles the case where we're copying from recently written data
|
||||
for (var i = 0; i < matchLength; i++)
|
||||
{
|
||||
buffer[dstIndex + i] = buffer[matchStart + i];
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static int ReadInt32LittleEndian(ReadOnlySpan<byte> data, int index)
|
||||
{
|
||||
return data[index] | (data[index + 1] << 8) | (data[index + 2] << 16) | (data[index + 3] << 24);
|
||||
}
|
||||
}
|
||||
|
|
@ -464,6 +464,13 @@ public static partial class AcBinarySerializer
|
|||
|
||||
#region Output
|
||||
|
||||
/// <summary>
|
||||
/// Returns the serialized data as a ReadOnlySpan without allocation.
|
||||
/// Use this for compression or other processing before final ToArray().
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public ReadOnlySpan<byte> AsSpan() => _buffer.AsSpan(0, _position);
|
||||
|
||||
public byte[] ToArray()
|
||||
{
|
||||
var result = GC.AllocateUninitializedArray<byte>(_position);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
using AyCode.Core.Helpers;
|
||||
using AyCode.Core.Compression;
|
||||
using AyCode.Core.Helpers;
|
||||
using AyCode.Core.Serializers.Expressions;
|
||||
using System.Buffers;
|
||||
using System.Collections;
|
||||
|
|
@ -248,6 +249,13 @@ public static partial class AcBinarySerializer
|
|||
var context = SerializeCore(actualValue, runtimeType, options);
|
||||
try
|
||||
{
|
||||
// Apply compression if enabled - compress directly from buffer span (1 allocation)
|
||||
if (options.UseCompression != Lz4CompressionMode.None)
|
||||
{
|
||||
return Lz4.Compress(context.AsSpan(), options.UseCompression);
|
||||
}
|
||||
|
||||
// No compression - single allocation for result
|
||||
return context.ToArray();
|
||||
}
|
||||
finally
|
||||
|
|
@ -260,6 +268,7 @@ public static partial class AcBinarySerializer
|
|||
/// <summary>
|
||||
/// Serialize object to an IBufferWriter for zero-copy scenarios.
|
||||
/// This avoids the final ToArray() allocation by writing directly to the caller's buffer.
|
||||
/// Note: Compression is applied if enabled in options.
|
||||
/// </summary>
|
||||
public static void Serialize<T>(T value, IBufferWriter<byte> writer, AcBinarySerializerOptions options)
|
||||
{
|
||||
|
|
@ -290,7 +299,18 @@ public static partial class AcBinarySerializer
|
|||
var context = SerializeCore(actualValue, runtimeType, options);
|
||||
try
|
||||
{
|
||||
context.WriteTo(writer);
|
||||
// Apply compression if enabled - compress directly from buffer span (1 allocation)
|
||||
if (options.UseCompression != Lz4CompressionMode.None)
|
||||
{
|
||||
var compressed = Lz4.Compress(context.AsSpan(), options.UseCompression);
|
||||
var destSpan = writer.GetSpan(compressed.Length);
|
||||
compressed.CopyTo(destSpan);
|
||||
writer.Advance(compressed.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
|
@ -323,6 +343,7 @@ public static partial class AcBinarySerializer
|
|||
/// <summary>
|
||||
/// Serialize object and keep the pooled buffer for zero-copy consumers.
|
||||
/// Caller must dispose the returned result to release the buffer.
|
||||
/// Note: Compression is applied if enabled in options, result will be immutable (not pooled).
|
||||
/// </summary>
|
||||
public static BinarySerializationResult SerializeToPooledBuffer<T>(T value, AcBinarySerializerOptions options)
|
||||
{
|
||||
|
|
@ -335,6 +356,13 @@ public static partial class AcBinarySerializer
|
|||
var context = SerializeCore(value, runtimeType, options);
|
||||
try
|
||||
{
|
||||
// If compression enabled, compress directly from buffer span (1 allocation)
|
||||
if (options.UseCompression != Lz4CompressionMode.None)
|
||||
{
|
||||
var compressed = Lz4.Compress(context.AsSpan(), options.UseCompression);
|
||||
return BinarySerializationResult.FromImmutable(compressed);
|
||||
}
|
||||
|
||||
return context.DetachResult();
|
||||
}
|
||||
finally
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
using System.Runtime.CompilerServices;
|
||||
using AyCode.Core.Compression;
|
||||
|
||||
namespace AyCode.Core.Serializers.Binaries;
|
||||
|
||||
|
|
@ -135,6 +136,16 @@ public sealed class AcBinarySerializerOptions : AcSerializerOptions
|
|||
/// </summary>
|
||||
public bool RemoveOrphanedItems { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Controls LZ4 compression for serialized data.
|
||||
/// None: No compression (default, fastest).
|
||||
/// Block: Compresses entire payload as single block (better compression ratio).
|
||||
/// BlockArray: Compresses in 64KB chunks (streaming-friendly, lower memory).
|
||||
/// Note: Both modes are WASM-compatible (pure managed implementation).
|
||||
/// Default: None
|
||||
/// </summary>
|
||||
public Lz4CompressionMode UseCompression { get; set; } = Lz4CompressionMode.None;
|
||||
|
||||
/// <summary>
|
||||
/// Creates options with specified max depth.
|
||||
/// </summary>
|
||||
|
|
|
|||
Loading…
Reference in New Issue