AyCode.Core/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Binary...

739 lines
31 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
namespace AyCode.Core.Serializers.Binaries;
public static partial class AcBinaryDeserializer
{
internal sealed partial class BinaryDeserializationContext<TInput>
{
private static readonly Encoding Utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
#region Buffer State <EFBFBD> owned by context for zero virtual dispatch
internal byte[] _buffer = null!;
internal int _bufferLength;
internal int _position;
#endregion
// String caching state <20> needed for WASM optimization
// The cache dictionary is owned by context (pooled), passed in at init time.
private bool _useStringCaching;
private int _maxCachedStringLength;
public bool IsAtEnd
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
if (_position < _bufferLength) return false;
// Trusted-single-segment fast path — JIT folds the constant: for ArrayBinaryInput
// the branch becomes `if (true) return true;` and the TryAdvanceSegment call below is
// dead-code-eliminated. For multi-segment / streaming inputs, the call is preserved.
if (TInput.IsTrustedSingleSegment) return true;
return !Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, 1);
}
}
public int Position
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => _position;
}
#region Core Read Methods
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte ReadByte()
{
// Routes through EnsureAvailable to leverage the trusted-single-segment JIT-eliminate guard.
// ArrayBinaryInput: EnsureAvailable body fully eliminated → just `_buffer[_position++]`.
// Multi-segment / streaming: bounds-check + TryAdvanceSegment kept (cross-segment safe).
EnsureAvailable(1);
return _buffer[_position++];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte PeekByte()
{
// Same trusted-single-segment fast path as ReadByte (no _position advance).
EnsureAvailable(1);
return _buffer[_position];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Skip(int count)
{
EnsureAvailable(count);
_position += count;
}
#endregion
#region Fixed-Width Reads
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public short ReadInt16Unsafe()
{
EnsureAvailable(2);
var value = Unsafe.ReadUnaligned<short>(ref _buffer[_position]);
_position += 2;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ushort ReadUInt16Unsafe()
{
EnsureAvailable(2);
var value = Unsafe.ReadUnaligned<ushort>(ref _buffer[_position]);
_position += 2;
return value;
}
/// <summary>
/// H2Q6 helper — reads 2 bytes as little-endian <c>ushort</c> (low byte = first byte, high byte = second).
/// Used by <c>StringSmall</c> / <c>StringInternFirstSmall</c> readers to grab <c>charLen:8 | utf8Len:8</c>
/// in a single 2-byte aligned-load + EnsureAvailable.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ushort ReadTwoBytesUnsafe()
{
EnsureAvailable(2);
var value = Unsafe.ReadUnaligned<ushort>(ref _buffer[_position]);
_position += 2;
return value;
}
/// <summary>
/// Reads a 4-byte unsigned integer (little-endian on Intel/AMD, native-endian elsewhere — wire format
/// is little-endian by convention; on big-endian hosts this would need <c>BinaryPrimitives.ReverseEndianness</c>).
/// Used by <c>StringBig</c> reader to grab <c>charLen:32</c> and <c>utf8Len:32</c>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint ReadUInt32Unsafe()
{
EnsureAvailable(4);
var value = Unsafe.ReadUnaligned<uint>(ref _buffer[_position]);
_position += 4;
return value;
}
/// <summary>
/// Reads a 4-byte signed integer (little-endian on Intel/AMD, native-endian elsewhere).
/// Symmetric with <c>Unsafe.WriteUnaligned&lt;int&gt;</c> on the writer side. Used by FastWire
/// <c>StringSmall</c> reader to grab <c>charLen:int32</c>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int ReadInt32Unsafe()
{
EnsureAvailable(4);
var value = Unsafe.ReadUnaligned<int>(ref _buffer[_position]);
_position += 4;
return value;
}
/// <summary>
/// Reads an 8-byte unsigned integer (little-endian on Intel/AMD, native-endian elsewhere).
/// Used by H2Q6 <c>StringBig</c> reader to grab packed <c>charLen:32 | utf8Len:32</c> in a single load.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ulong ReadUInt64Unsafe()
{
EnsureAvailable(8);
var value = Unsafe.ReadUnaligned<ulong>(ref _buffer[_position]);
_position += 8;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public char ReadCharUnsafe()
{
EnsureAvailable(2);
var value = (char)Unsafe.ReadUnaligned<ushort>(ref _buffer[_position]);
_position += 2;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public float ReadSingleUnsafe()
{
EnsureAvailable(4);
var bits = Unsafe.ReadUnaligned<int>(ref _buffer[_position]);
_position += 4;
return BitConverter.Int32BitsToSingle(bits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public double ReadDoubleUnsafe()
{
EnsureAvailable(8);
var bits = Unsafe.ReadUnaligned<long>(ref _buffer[_position]);
_position += 8;
return BitConverter.Int64BitsToDouble(bits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public decimal ReadDecimalUnsafe()
{
EnsureAvailable(16);
var span = _buffer.AsSpan(_position, 16);
var ints = MemoryMarshal.Cast<byte, int>(span);
var lo = ints[0];
var mid = ints[1];
var hi = ints[2];
var flags = ints[3];
var isNegative = (flags & unchecked((int)0x80000000)) != 0;
var scale = (byte)((flags >> 16) & 0x7F);
LogDecimalDrift(scale);
_position += 16;
return new decimal(lo, mid, hi, isNegative, scale);
}
[Conditional("DEBUG")]
private void LogDecimalDrift(byte scale)
{
if (scale <= 28) return;
var hex = BitConverter.ToString(_buffer, _position, Math.Min(16, _bufferLength - _position));
throw new AcBinaryDeserializationException(
$"[DECIMAL_DRIFT] scale={scale}, pos={_position}, bufLen={_bufferLength}, " +
$"bufArray={_buffer.Length}, hex={hex}", _position);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public DateTime ReadDateTimeUnsafe()
{
EnsureAvailable(9);
var ticks = Unsafe.ReadUnaligned<long>(ref _buffer[_position]);
var kind = (DateTimeKind)_buffer[_position + 8];
_position += 9;
return new DateTime(ticks, kind);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public DateTimeOffset ReadDateTimeOffsetUnsafe()
{
EnsureAvailable(10);
var utcTicks = Unsafe.ReadUnaligned<long>(ref _buffer[_position]);
var offsetMinutes = Unsafe.ReadUnaligned<short>(ref _buffer[_position + 8]);
_position += 10;
var utcValue = new DateTime(utcTicks, DateTimeKind.Utc);
return new DateTimeOffset(utcValue).ToOffset(TimeSpan.FromMinutes(offsetMinutes));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TimeSpan ReadTimeSpanUnsafe()
{
EnsureAvailable(8);
var ticks = Unsafe.ReadUnaligned<long>(ref _buffer[_position]);
_position += 8;
return new TimeSpan(ticks);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Guid ReadGuidUnsafe()
{
EnsureAvailable(16);
var value = new Guid(_buffer.AsSpan(_position, 16));
_position += 16;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public T ReadRaw<T>() where T : unmanaged
{
var size = Unsafe.SizeOf<T>();
EnsureAvailable(size);
var value = Unsafe.ReadUnaligned<T>(ref _buffer[_position]);
_position += size;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int ReadInt32Raw()
{
EnsureAvailable(4);
var value = Unsafe.ReadUnaligned<int>(ref _buffer[_position]);
_position += 4;
return value;
}
#endregion
#region VarInt Reading
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int ReadVarInt()
{
//if (FastWire) { return ReadRaw<int>(); }
var raw = ReadVarUInt();
var value = (int)(raw >> 1) ^ -(int)(raw & 1);
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint ReadVarUInt()
{
//if (FastWire) { return ReadRaw<uint>(); }
// Multi-segment safety: ensure at least 1 byte before direct buffer access.
// ArrayBinaryInput: TryAdvanceSegment => false (JIT eliminates this branch).
if (_position >= _bufferLength)
{
if (!Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, 1))
throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position);
}
// Fast path: single byte (0-127) - ~70% of cases
var b0 = _buffer[_position];
if ((b0 & 0x80) == 0)
{
_position++;
return b0;
}
// Fast path: two bytes (128-16383) - ~25% of cases
if (_position + 1 < _bufferLength)
{
var b1 = _buffer[_position + 1];
if ((b1 & 0x80) == 0)
{
_position += 2;
return (uint)(b0 & 0x7F) | ((uint)b1 << 7);
}
}
// Slow path: 3+ bytes or cross-segment boundary — uses ReadByte() per byte
return ReadVarUIntSlow();
}
private uint ReadVarUIntSlow()
{
uint value = 0;
var shift = 0;
while (true)
{
var b = ReadByte();
value |= (uint)(b & 0x7F) << shift;
if ((b & 0x80) == 0)
{
break;
}
shift += 7;
if (shift > 35)
{
throw new AcBinaryDeserializationException("Invalid VarUInt encoding.", _position);
}
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public long ReadVarLong()
{
//if (FastWire) { return ReadRaw<long>(); }
var raw = ReadVarULong();
var value = (long)(raw >> 1) ^ -((long)raw & 1);
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ulong ReadVarULong()
{
//if (FastWire) { return ReadRaw<ulong>(); }
ulong value = 0;
var shift = 0;
while (true)
{
var b = ReadByte();
value |= (ulong)(b & 0x7F) << shift;
if ((b & 0x80) == 0)
{
break;
}
shift += 7;
if (shift > 70)
{
throw new AcBinaryDeserializationException("Invalid VarULong encoding.", _position);
}
}
return value;
}
#endregion
/// <summary>
/// Called on first StringInternFirst marker <20> disables _stringCache because
/// interned strings are resolved via _internCache and plain strings appear only once.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void DisableStringCaching()
{
_useStringCaching = false;
}
#region Bytes & String Reading
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte[] ReadBytes(int length)
{
if (length == 0)
{
return [];
}
EnsureAvailable(length);
var result = GC.AllocateUninitializedArray<byte>(length);
_buffer.AsSpan(_position, length).CopyTo(result);
_position += length;
return result;
}
/// <summary>
/// Reads a UTF-16 raw string of <paramref name="charLength"/> chars (FastWire mode body).
/// Wire body is <c>charLength * 2</c> raw bytes (LE on Intel/AMD, native-endian elsewhere) — zero-decode
/// memcpy via <see cref="MemoryMarshal.Cast{TFrom, TTo}(System.Span{TFrom})"/>.
/// <para>Caller MUST be on the FastWire path. The companion <see cref="ReadStringUtf8"/> is
/// for Compact/UTF-8 wire only — the two paths are statically separate (no FastWire-runtime-check
/// inside this method).</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf16(int charLength)
{
if (charLength == 0) return string.Empty;
var byteLen = charLength * 2;
EnsureAvailable(byteLen);
var chars = MemoryMarshal.Cast<byte, char>(_buffer.AsSpan(_position, byteLen));
var value = new string(chars);
_position += byteLen;
return value;
}
/// <summary>
/// FastWire markerless string read — int32 sentinel header. Self-contained: handles all three
/// states (null / empty / content) via int32 dispatch. <c>-1</c> = null, <c>0</c> = empty,
/// <c>N &gt; 0</c> = content (followed by N×2 UTF-16 raw bytes).
/// <para>Hot-path-first: positive length (content) is the common case, branch-prediction-favored.
/// Companion writer is <see cref="BinarySerializationContext{TOutput}.WriteStringUtf16Markerless"/>.</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string? ReadStringUtf16Markerless()
{
var len = ReadInt32Unsafe();
if (len > 0) return ReadStringUtf16(len);
if (len == 0) return string.Empty;
return null; // len < 0 (sentinel -1)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8(int length)
{
if (length == 0)
{
return string.Empty;
}
EnsureAvailable(length);
// WASM optimization: cache short strings to reduce allocations
if (_useStringCaching && length <= _maxCachedStringLength)
{
return ReadStringUtf8Cached(length);
}
// BASELINE TEMP: ASCII fast path disabled — every string takes the custom UTF-8 decoder.
// Used to measure custom decoder performance in isolation, without ASCII-fast-path-vs-decoder
// dispatch interference. Re-enable once decoder optimization is benchmarked and verified.
//
//// ASCII fast path: short strings (≤128 bytes) with all ASCII bytes
//// use string.Create + direct byte→char widening, avoiding UTF8Encoding overhead.
//if (length <= 128 && System.Text.Ascii.IsValid(_buffer.AsSpan(_position, length)))
//{
// var pos = _position;
// _position += length;
// return string.Create(length, (Buffer: _buffer, Start: pos), static (chars, state) =>
// {
// var src = state.Buffer.AsSpan(state.Start, chars.Length);
// for (var i = 0; i < chars.Length; i++)
// chars[i] = (char)src[i];
// });
//}
// All strings — custom UTF-8 decoder.
// Beats Encoding.UTF8.GetString by skipping the virtual-dispatch + encoder-fallback
// overhead the BCL adds for arbitrary inputs. Two passes (count + decode) over the
// bytes — both passes are tight scalar loops the JIT can auto-vectorize for the
// common 1-byte (ASCII) branch, with predictable branches for 2/3-byte sequences
// (Latin extended, Cyrillic, Greek, CJK BMP). 4-byte sequences (supplementary plane:
// emoji, rare CJK ext) decode to a UTF-16 surrogate pair.
//
// The bytes are guaranteed valid UTF-8 because we wrote them via Encoding.UTF8.GetBytes
// — no validation needed beyond the bounds checks Span indexing already provides.
// If a wire payload is corrupt, an IndexOutOfRangeException surfaces at the
// continuation-byte read, which the calling deserializer propagates as a
// deserialization failure (same exception class as the BCL path's malformed-input
// handling).
return DecodeUtf8(length);
}
/// <summary>
/// Reads <paramref name="byteLength"/> ASCII bytes from the wire and widens them to a UTF-16
/// string. Caller MUST guarantee the payload is pure ASCII — typically by dispatching on a
/// <c>FixStrAscii</c> / <c>StringAscii</c> marker (the marker IS the ASCII-validity contract).
/// </summary>
/// <remarks>
/// Skips the UTF-8 decoder entirely — every byte maps 1:1 to a char via simple widening.
/// Uses <see cref="Encoding.Latin1"/>.<c>GetString</c> for the widen — Latin1 is byte→char
/// 1:1 (codepoints 0..255), and ASCII (0..127) is a strict subset, so for marker-validated
/// ASCII payloads the result is identical to a hand-rolled <c>(char)b</c> widen but uses
/// the BCL's SIMD-accelerated implementation (single-shot allocation + memcpy-class widen).
///
/// Beats a <c>string.Create</c> + scalar callback widen by avoiding the lambda-state passing
/// and JIT-trust on auto-vectorization across the callback boundary.
///
/// FastWire mode never emits ASCII markers — they're a Compact-mode-only optimization. If
/// FastWire encounters one (cross-mode wire mismatch), the read still works but the FastWire
/// raw-memcpy fast path doesn't apply.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadAsciiBytesAsString(int byteLength)
{
if (byteLength == 0) return string.Empty;
EnsureAvailable(byteLength);
// Cached short-string path (WASM optimization) — leverages full-content hash + Ascii.Equals
// verification (which is a no-op fast path on ASCII content).
if (_useStringCaching && byteLength <= _maxCachedStringLength)
{
return ReadStringUtf8Cached(byteLength);
}
var pos = _position;
_position += byteLength;
return Encoding.Latin1.GetString(_buffer, pos, byteLength);
}
/// <summary>
/// Custom UTF-8 → UTF-16 string decoder.
/// </summary>
/// <remarks>
/// Two-pass over bytes (count + decode) with zero intermediate allocation:
/// • Pass 1 — <see cref="CountUtf8Chars"/>: counts UTF-16 chars produced (scalar, JIT-vectorizable).
/// • Pass 2 — <see cref="DecodeUtf8SinglePass"/> inside <see cref="string.Create{TState}"/> callback:
/// decodes directly into the newly-allocated string's char buffer. No memcpy, no temp buffer,
/// no <c>ArrayPool</c> rent.
///
/// Beats <see cref="System.Text.Encoding.UTF8"/>.GetString by:
/// 1. Skipping virtual-dispatch + encoder-fallback overhead the BCL adds for arbitrary inputs.
/// 2. Multi-byte branches via direct bit-extract — no overlong/surrogate range checks.
/// 3. Vector256 ASCII prefix bulk widen (32 bytes/iter while all-ASCII) inside Pass 2.
/// 4. DWORD ASCII batch (4 bytes/iter when ASCII-aligned) inside Pass 2's scalar loop.
///
/// The bytes are guaranteed valid UTF-8 because the writer used <c>Encoding.UTF8.GetBytes</c>.
/// If a wire payload is corrupt (incomplete multi-byte sequence), an
/// <see cref="IndexOutOfRangeException"/> surfaces at the continuation-byte read,
/// which the calling deserializer propagates as a deserialization failure.
/// </remarks>
[MethodImpl(MethodImplOptions.NoInlining)] // cold path; keep ReadStringUtf8 caller small
private string DecodeUtf8(int byteLength)
{
var pos = _position;
_position += byteLength;
var src = _buffer.AsSpan(pos, byteLength);
var charCount = Encoding.UTF8.GetCharCount(src);
return string.Create(charCount, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
{
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
});
}
/// <summary>
/// H2Q6 1-pass UTF-8 string read — both <paramref name="charLength"/> and <paramref name="byteLength"/>
/// come from the wire (StringSmall/Medium/Big tier headers), eliminating the
/// <see cref="Utf8Transcoder.CountUtf8Chars"/> Pass 1.
/// </summary>
/// <remarks>
/// Single method (no dispatcher/core split): the V4N4 split attempt did not pay off — the AOT
/// did NOT inline the dispatcher despite `[AggressiveInlining]` (disasm 15:12 confirmed both
/// dispatcher AND core body remained as call-targets), so the only effect was +1 call instruction
/// per decode (Small Deser regression +16.6 pp). Reverted to single method — `string.Create`
/// callback uses a cached static lambda (delegate caching confirmed by `test static; jne skip ctor`
/// pattern in disasm).
///
/// <para>Compact mode only — FastWire mode never emits H2Q6 tier markers (its
/// <see cref="ReadStringUtf8"/> path handles UTF-16 raw memcpy).</para>
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8WithCharLen(int charLength, int byteLength)
{
if (byteLength == 0) return string.Empty;
EnsureAvailable(byteLength);
// WASM string-cache fast path — if cached, byte-cmp validates and returns the canonical instance
if (_useStringCaching && byteLength <= _maxCachedStringLength)
{
return ReadStringUtf8Cached(byteLength);
}
var pos = _position;
_position += byteLength;
return string.Create(charLength, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
{
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
});
}
private string ReadStringUtf8Cached(int length)
{
var slice = _buffer.AsSpan(_position, length);
var hash = ComputeStringHashFull(slice);
if (_stringCache!.TryGetValue(hash, out var cached))
{
if (cached.Length == length && Ascii.Equals(slice, cached))
{
_position += length;
return cached;
}
}
var value = Utf8NoBom.GetString(slice);
_stringCache[hash] = value;
_position += length;
return value;
}
/// <summary>
/// H2Q6 StringInternFirstSmall reader: wire <c>[cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]</c>
/// after the marker has been consumed. Registers the decoded string in the intern cache and returns it.
/// Single source of wire-decode for this marker — shared by the runtime <c>TypeReaderTable</c>
/// dispatch, the cross-type populate path, and the SGen-emitted string-property switch.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadAndRegisterInternedStringSmall()
{
// First interning marker proves payload uses string interning → plain String entries
// appear only once, so _stringCache would never hit on them.
DisableStringCaching();
var cacheIndex = (int)ReadVarUInt();
var header = ReadTwoBytesUnsafe();
var charLength = (byte)header;
var byteLength = (byte)(header >> 8);
if (byteLength == 0)
{
RegisterInternedValueAt(cacheIndex, string.Empty);
return string.Empty;
}
var str = ReadStringUtf8WithCharLen(charLength, byteLength);
RegisterInternedValueAt(cacheIndex, str);
return str;
}
/// <summary>
/// H2Q6 StringInternFirstMedium reader: wire <c>[cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes]</c>.
/// Registers the decoded string in the intern cache and returns it. (Big tier never engages on the
/// interning path — see <see cref="BinaryTypeCode"/> H2Q6 layout comment.) Shared by runtime
/// dispatch + SGen-emit (same rationale as <see cref="ReadAndRegisterInternedStringSmall"/>).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadAndRegisterInternedStringMedium()
{
DisableStringCaching();
var cacheIndex = (int)ReadVarUInt();
// Pack charLen:16 | utf8Len:16 read in a single uint load
var packed = ReadUInt32Unsafe();
var charLength = (ushort)packed;
var byteLength = (ushort)(packed >> 16);
if (byteLength == 0)
{
RegisterInternedValueAt(cacheIndex, string.Empty);
return string.Empty;
}
var str = ReadStringUtf8WithCharLen(charLength, byteLength);
RegisterInternedValueAt(cacheIndex, str);
return str;
}
/// <summary>
/// Full-content hash for string caching.
/// CRITICAL: DO NOT SIMPLIFY <20> prevents hash collisions for similar property names.
/// See BinaryDeserializationContext for full history.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ComputeStringHashFull(ReadOnlySpan<byte> data)
{
if (data.Length <= 32)
{
var hash = new HashCode();
hash.AddBytes(data);
return hash.ToHashCode();
}
var h = new HashCode();
h.Add(data.Length);
h.AddBytes(data.Slice(0, 8));
h.AddBytes(data.Slice(data.Length - 8, 8));
h.AddBytes(data.Slice(data.Length / 2 - 4, 8));
return h.ToHashCode();
}
#endregion
/// <summary>
/// Ensures <paramref name="length"/> bytes are readable from the current <c>_buffer[_position]</c>
/// position, advancing to the next segment via <see cref="IBinaryInputBase.TryAdvanceSegment"/> when
/// the current buffer is exhausted.
/// <para><b>JIT specialization fast-path</b>: when <c>TInput.IsTrustedSingleSegment</c> is the
/// constant <c>true</c> (e.g. <see cref="ArrayBinaryInput"/>), the entire method body is eliminated
/// at JIT time — bounds-check + segment-advance both vanish. Per-read overhead drops to ~0 ns.
/// Trade-off: corrupt-wire detection downgrades from <see cref="AcBinaryDeserializationException"/>
/// to a generic <see cref="System.IndexOutOfRangeException"/>; acceptable for trusted byte[] inputs
/// where the buffer is already validated by the caller.</para>
/// <para>For non-trusted inputs (<see cref="SequenceBinaryInput"/>, AsyncPipeReaderInputAdapter), the
/// guard's <c>if (false) return;</c> form is dead-code-eliminated — the bounds-check and segment-advance
/// keep their original behaviour with zero added overhead.</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void EnsureAvailable(int length)
{
// Trusted-single-segment fast path — JIT folds the constant per TInput specialization:
// ArrayBinaryInput → if (true) return; → method body entirely eliminated
// SequenceBinaryInput → if (false) return; → guard eliminated, bounds-check kept
// AsyncPipeReaderInput → if (false) return; → guard eliminated, bounds-check kept
if (TInput.IsTrustedSingleSegment) return;
if (_position > _bufferLength - length)
{
if (!Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, length))
throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position);
AssertGuarantee(length);
}
}
[Conditional("DEBUG")]
private void AssertGuarantee(int needed)
{
if (_bufferLength - _position < needed)
throw new AcBinaryDeserializationException(
$"[GUARANTEE_VIOLATED] TryAdvanceSegment returned true but available={_bufferLength - _position} < needed={needed}, " +
$"pos={_position}, bufLen={_bufferLength}, bufArray={_buffer.Length}", _position);
}
}
}