diff --git a/.claude/settings.local.json b/.claude/settings.local.json index a804aa4..88779da 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -83,7 +83,9 @@ "Bash(dotnet test *)", "Read(//tmp/**)", "Bash(git -C H:/Applications/Aycode/Source/AyCode.Core log -p --all -S \"MaxDepth\" -- AyCode.Core/Serializers/Binaries/AcBinarySerializer.ScanPass.cs)", - "Bash(git -C H:/Applications/Aycode/Source/AyCode.Core show ac6e66f^:AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs)" + "Bash(git -C H:/Applications/Aycode/Source/AyCode.Core show ac6e66f^:AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs)", + "Bash(ls -la \"H:\\\\Applications\\\\Aycode\\\\Source\\\\\" 2>&1 && echo \"---\" && ls -la \"H:\\\\Applications\\\\Aycode\\\\\" 2>&1)", + "Bash(dotnet publish *)" ] } } diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs index 127c897..1cb0719 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs @@ -2008,32 +2008,14 @@ public class AcBinarySourceGenerator : IIncrementalGenerator sb.AppendLine($"{i} {a} = salen == 0 ? string.Empty : context.ReadAsciiBytesAsString(salen);"); sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} }}"); - // H2Q6 interning — Small tier + // H2Q6 interning — Small / Medium tiers. Wire-decode body is shared with the runtime path + // (TypeReaderTable + cross-type populate) — see context.ReadAndRegisterInternedStringSmall/Medium. sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} context.DisableStringCaching();"); - sb.AppendLine($"{i} var iscIdx = (int)context.ReadVarUInt();"); - sb.AppendLine($"{i} var ishdr = context.ReadTwoBytesUnsafe();"); - sb.AppendLine($"{i} var ischarLen = (byte)ishdr;"); - sb.AppendLine($"{i} var isbyteLen = (byte)(ishdr >> 8);"); - sb.AppendLine($"{i} var isv = isbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(ischarLen, isbyteLen);"); - sb.AppendLine($"{i} context.RegisterInternedValueAt(iscIdx, isv);"); - sb.AppendLine($"{i} {a} = isv;"); + sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringSmall();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 interning — Medium tier — single uint header read sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstMedium:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} context.DisableStringCaching();"); - sb.AppendLine($"{i} var imcIdx = (int)context.ReadVarUInt();"); - sb.AppendLine($"{i} var impacked = context.ReadUInt32Unsafe();"); - sb.AppendLine($"{i} var imcharLen = (ushort)impacked;"); - sb.AppendLine($"{i} var imbyteLen = (ushort)(impacked >> 16);"); - sb.AppendLine($"{i} var imv = imbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(imcharLen, imbyteLen);"); - sb.AppendLine($"{i} context.RegisterInternedValueAt(imcIdx, imv);"); - sb.AppendLine($"{i} {a} = imv;"); + sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringMedium();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); sb.AppendLine($"{i} case BinaryTypeCode.Null:"); sb.AppendLine($"{i} {a} = null;"); sb.AppendLine($"{i} break;"); diff --git a/AyCode.Core/Serializers/AcSerializerContextBase.cs b/AyCode.Core/Serializers/AcSerializerContextBase.cs index 2f5d7c4..7b295c9 100644 --- a/AyCode.Core/Serializers/AcSerializerContextBase.cs +++ b/AyCode.Core/Serializers/AcSerializerContextBase.cs @@ -98,8 +98,7 @@ public abstract class AcSerializerContextBase /// The wrapper contains metadata (from GlobalMetadataCache) + per-context tracking state. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public TypeMetadataWrapper GetWrapper( - [DynamicallyAccessedMembers(TypeMetadataBase.RequiredMembers)] Type type) + public TypeMetadataWrapper GetWrapper([DynamicallyAccessedMembers(TypeMetadataBase.RequiredMembers)] Type type) { if (_wrappers.TryGetValue(type, out var wrapper)) return wrapper; @@ -108,8 +107,7 @@ public abstract class AcSerializerContextBase } [MethodImpl(MethodImplOptions.NoInlining)] - private TypeMetadataWrapper GetWrapperSlow( - [DynamicallyAccessedMembers(TypeMetadataBase.RequiredMembers)] Type type) + private TypeMetadataWrapper GetWrapperSlow([DynamicallyAccessedMembers(TypeMetadataBase.RequiredMembers)] Type type) { // Get metadata from global cache (thread-safe). // ConcurrentDictionary.GetOrAdd's Func overload drops DAMs at the delegate diff --git a/AyCode.Core/Serializers/Attributes/AcBinarySerializableAttribute.cs b/AyCode.Core/Serializers/Attributes/AcBinarySerializableAttribute.cs index fe30016..24e94d8 100644 --- a/AyCode.Core/Serializers/Attributes/AcBinarySerializableAttribute.cs +++ b/AyCode.Core/Serializers/Attributes/AcBinarySerializableAttribute.cs @@ -27,9 +27,8 @@ namespace AyCode.Core.Serializers.Attributes; /// /// /// Set a flag to false only when you can guarantee no consumer will ever need that -/// feature on this type (typical for high-throughput message DTOs where wire size and serialize -/// CPU dominate, and a feature like PropertyFilter is genuinely never used). Otherwise leave -/// it at the default true. +/// feature on this type AND the marginal hot-path speedup matters for the workload. Otherwise +/// leave it at the default true. /// [AttributeUsage(AttributeTargets.Class | AttributeTargets.Struct, Inherited = false, AllowMultiple = false)] public sealed class AcBinarySerializableAttribute : Attribute diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index d120a31..93752a3 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -619,6 +619,57 @@ public static partial class AcBinaryDeserializer return value; } + /// + /// H2Q6 StringInternFirstSmall reader: wire [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] + /// after the marker has been consumed. Registers the decoded string in the intern cache and returns it. + /// Single source of wire-decode for this marker — shared by the runtime TypeReaderTable + /// dispatch, the cross-type populate path, and the SGen-emitted string-property switch. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadAndRegisterInternedStringSmall() + { + // First interning marker proves payload uses string interning → plain String entries + // appear only once, so _stringCache would never hit on them. + DisableStringCaching(); + var cacheIndex = (int)ReadVarUInt(); + var header = ReadTwoBytesUnsafe(); + var charLength = (byte)header; + var byteLength = (byte)(header >> 8); + if (byteLength == 0) + { + RegisterInternedValueAt(cacheIndex, string.Empty); + return string.Empty; + } + var str = ReadStringUtf8WithCharLen(charLength, byteLength); + RegisterInternedValueAt(cacheIndex, str); + return str; + } + + /// + /// H2Q6 StringInternFirstMedium reader: wire [cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes]. + /// Registers the decoded string in the intern cache and returns it. (Big tier never engages on the + /// interning path — see H2Q6 layout comment.) Shared by runtime + /// dispatch + SGen-emit (same rationale as ). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadAndRegisterInternedStringMedium() + { + DisableStringCaching(); + var cacheIndex = (int)ReadVarUInt(); + // Pack charLen:16 | utf8Len:16 read in a single uint load + var packed = ReadUInt32Unsafe(); + var charLength = (ushort)packed; + var byteLength = (ushort)(packed >> 16); + if (byteLength == 0) + { + RegisterInternedValueAt(cacheIndex, string.Empty); + return string.Empty; + } + var str = ReadStringUtf8WithCharLen(charLength, byteLength); + RegisterInternedValueAt(cacheIndex, str); + return str; + } + /// /// Full-content hash for string caching. /// CRITICAL: DO NOT SIMPLIFY � prevents hash collisions for similar property names. diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 5a31493..501f86a 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -105,8 +105,8 @@ public static partial class AcBinaryDeserializer readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt()); readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty; // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) - readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ReadAndRegisterInternedStringSmall(ctx); - readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ReadAndRegisterInternedStringMedium(ctx); + readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); + readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ReadPlainStringAscii(ctx); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); @@ -1067,10 +1067,10 @@ public static partial class AcBinaryDeserializer propInfo.SetValue(target, context.GetInternedString((int)context.ReadVarUInt())); return true; case BinaryTypeCode.StringInternFirstSmall: - propInfo.SetValue(target, ReadAndRegisterInternedStringSmall(context)); + propInfo.SetValue(target, context.ReadAndRegisterInternedStringSmall()); return true; case BinaryTypeCode.StringInternFirstMedium: - propInfo.SetValue(target, ReadAndRegisterInternedStringMedium(context)); + propInfo.SetValue(target, context.ReadAndRegisterInternedStringMedium()); return true; } break; @@ -1239,55 +1239,10 @@ public static partial class AcBinaryDeserializer return context.ReadAsciiBytesAsString(length); } - /// - /// H2Q6 StringInternFirstSmall reader: wire [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] - /// after the marker has been consumed. Registers the decoded string in the intern cache. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadAndRegisterInternedStringSmall(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - // First interning marker proves payload uses string interning → plain String entries - // appear only once, so _stringCache would never hit on them. - context.DisableStringCaching(); - var cacheIndex = (int)context.ReadVarUInt(); - var header = context.ReadTwoBytesUnsafe(); - var charLength = (byte)header; - var byteLength = (byte)(header >> 8); - if (byteLength == 0) - { - context.RegisterInternedValueAt(cacheIndex, string.Empty); - return string.Empty; - } - var str = context.ReadStringUtf8WithCharLen(charLength, byteLength); - context.RegisterInternedValueAt(cacheIndex, str); - return str; - } - - /// - /// H2Q6 StringInternFirstMedium reader: wire [cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes]. - /// Registers the decoded string in the intern cache. (Big tier never engages on the interning path — - /// see H2Q6 layout comment.) - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadAndRegisterInternedStringMedium(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - context.DisableStringCaching(); - var cacheIndex = (int)context.ReadVarUInt(); - // Pack charLen:16 | utf8Len:16 read in a single uint load - var packed = context.ReadUInt32Unsafe(); - var charLength = (ushort)packed; - var byteLength = (ushort)(packed >> 16); - if (byteLength == 0) - { - context.RegisterInternedValueAt(cacheIndex, string.Empty); - return string.Empty; - } - var str = context.ReadStringUtf8WithCharLen(charLength, byteLength); - context.RegisterInternedValueAt(cacheIndex, str); - return str; - } + // ReadAndRegisterInternedStringSmall / Medium moved to BinaryDeserializationContext as instance + // methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty + // cross-type path, and the SGen-emitted string-property switch. See + // `BinaryDeserializationContext.Read.cs` for the implementations. ///// ///// Read a string and register it in the intern table for future references. diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index 8f2aec1..6408c79 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -21,6 +21,86 @@ For each performance TODO, validate on representative workload mixes (ASCII-heav - Minimize pool/clear overhead by avoiding unnecessary aggressive array clearing in hot lifecycle paths. - Add early scan-pass short-circuit when options guarantee no ref/intern benefit. +## ACCORE-BIN-T-K9M3: Hoist wire codec primitives to context instance methods (ser + deser, feature-aware SGen emit) +**Priority:** P2 · **Type:** Refactor + Performance · **Related:** `ACCORE-BIN-T-P6M4` (hotpath guardrails), `BINARY_ISSUES.md#accore-bin-i-t7k3` (polymorph compile-time guard) + +### Motivation + +Wire codec logic is currently triplicated: + +1. **SGen-emit** inlines marker decode/encode at every property emit site (`StringInternFirstSmall`, `Object`/`ObjectRefFirst`/`Null`/`ObjectRef`/FixObj-slot dispatch, etc.). +2. **Runtime `TypeReaderTable`** dispatches via `static (ctx, _) => ReadXxx(ctx)` lambdas to per-marker `static` helpers in `AcBinaryDeserializer`. +3. **Cross-type populate** (`PopulateProperty` fallback) repeats the same per-marker switch. + +Result: bug-fix risk (three copies drift), ad-hoc divergence (the polymorph `ObjectWithTypeName` emit was missing on the SGen side for months — ACCORE-BIN-I-T7K3), larger generated assemblies, longer JIT time. A single instance method on the context is the natural single-source-of-truth for each wire primitive. + +### Pilot landed + +`ReadAndRegisterInternedStringSmall` / `Medium` moved from `static` helpers on `AcBinaryDeserializer` to `internal` instance methods on `BinaryDeserializationContext`. All three call paths (TypeReaderTable lambdas, cross-type `PopulateProperty` switch, SGen-emit `EmitReadProp` case-body) now call `context.ReadAndRegister...()`. Generated case-body shrank from 12 lines to 3 per case — no perf regression (`[AggressiveInlining]` keeps the JIT/AOT inline footprint identical). + +### Scope — both ser and deser + +#### Phase A — Decode primitives (deser context) + +- `ReadStringSmall` / `Medium` / `Big` (H2Q6 non-ASCII tiers). +- `ReadPlainStringAscii` (long ASCII tier). +- `ReadObject` family — careful: this branches on `targetType` and on the writer's runtime polymorphic slot table, both of which are call-site-context-specific. May not be a clean hoist; see "Caveat" below. + +#### Phase B — Encode primitives (ser context) + +- `WriteStringWithDispatch`, `WriteStringInternFirstWithDispatch` — already partly on the context, audit completeness. +- Marker-write helpers (`WriteObjectFullMarker*`) — already on the context post-T7K3. +- Audit: scan ser-side SGen-emit for any inline encode duplication that should move to the context. + +#### Phase C — Feature-conditional SGen-emit + +`EmitReadProp` (and the symmetric emit paths) must consult the per-type `Enable*Feature` flags to **omit** case-branches for disabled features. Today the SGen reader handles every marker regardless of the type's feature opt-outs — wasteful, and worse, it silently accepts markers the writer would never emit (instead of fail-fast): + +| Disabled feature | Cases to skip in SGen reader emit | +|---|---| +| `EnableInternStringFeature = false` | `StringInterned`, `StringInternFirstSmall`, `StringInternFirstMedium` | +| `EnableRefHandlingFeature = false` | `ObjectRef`, `ObjectRefFirst`, `ObjectWithMetadataRefFirst` | +| `EnableMetadataFeature = false` | `ObjectWithMetadata`, `ObjectWithMetadataRefFirst` | +| `EnablePolymorphDetectFeature = false` | Already guarded by ACBIN002 (compile error if any `object` property remains on the type) — symmetric here. | + +After Phase C: leaner generated code per opt-out type AND wire-misuse (e.g. mixed writer/reader feature configurations) surfaces as **explicit fail-fast** in the `default` switch arm — same philosophy as ACBIN002. + +### Perf guardrails (NON-NEGOTIABLE) + +The hoisting MUST NOT regress SGen hot-path performance. The pilot iteration was a net positive (less IL → faster cold-start JIT, smaller native code, identical inline body); this property has to hold for every subsequent hoist. + +Rules of thumb: + +- Every hoisted method MUST have `[MethodImpl(MethodImplOptions.AggressiveInlining)]`. +- Body must stay small (≤ ~30 IL instructions after compile) so the JIT/AOT actually inlines — verify via `dotnet jit-dasm` spot-check on representative callers. +- Single-purpose; no `if`-branches across distinct call-site contexts (those stay inline at the call site where the context-specific constants are visible). +- Benchmark verification before/after each hoist (`Console.FullBenchmark`). + +### JIT / NativeAOT outlook + +Modern .NET JIT (≥7) and NativeAOT both honour `AggressiveInlining` for small bodies → the hoisted methods inline back into the caller at compile time → **identical native code** to the previous inline-emit. The IL is smaller (less SGen-emit per file), which gives: + +- Faster cold-start JIT (less IL to translate on first call per type). +- Smaller assemblies on disk (NativeAOT publish size shrinks). +- Smaller i-cache footprint per active hot type (since SGen-emit no longer balloons per property). + +The generic `` specialization remains: each `ArrayBinaryInput` / `SequenceBinaryInput` / `AsyncPipeReaderInput` still gets its own native body (`TInput.IsTrustedSingleSegment` constant-folds per specialization), so no overhead vs. the current state. + +NativeAOT additionally prefers small, single-purpose methods: register-allocation (LSRA) is more effective, peephole / loop-unroll / dead-code passes run faster per method, and the published native image is denser. The previous "giant SGen-emitted `ReadProperties` body" pattern was actively hostile to AOT in this respect. + +### Caveat — where NOT to hoist + +Not every inline emit is a candidate. If the inline body carries compile-time constants (`typeof(TFoo)` literal, direct `Instance.ReadProperties` call on a concrete generated reader class, `nameof(prop)` constant), hoisting forces those into runtime parameters: constant-folding opportunity lost AND a direct call may become virtual via interface dispatch. The `Complex` property dispatch (`Object` → new T + `ReadProperties` direct call) is in this category and should stay inline at the SGen emit site. + +Decision per primitive: can it be expressed as a context method that takes only wire-bytes-relevant inputs (no `targetType` literal, no per-property setter callback)? If yes → hoist. If no → keep inline. + +### Acceptance + +- Phase A: all shared decode primitives reachable as instance methods on `BinaryDeserializationContext`. TypeReaderTable + cross-type populate + SGen-emit all call them. SGen-generated case-body for each affected marker is ≤ 3 lines. +- Phase B: ser-side audit complete; any encode duplication closed by hoist or explicit "keep inline — see caveat" note in the SGen comment. +- Phase C: SGen-emit reader honours `Enable*Feature` flags. Verified by spot-checking generated `*.g.cs` files: an `EnableInternStringFeature=false` type's reader does NOT contain `StringInternFirstSmall` / `Medium` / `StringInterned` cases. +- Per-phase benchmark run (`Console.FullBenchmark`) confirms no hot-path regression (within noise floor). + ## ACCORE-BIN-T-S8P4: Replace JSON-in-Binary request parameters **Priority:** P1 · **Type:** Refactor · **Status:** Closed (2026-04-26, landed in commits `cdd54d3` 2026-04-05 + `3b70070` 2026-04-06) · **Related:** `../XCUT/XCUT_ISSUES.md#accore-xcut-i-x8q1` (canonical), `AyCode.Services/docs/SIGNALR/SIGNALR_TODO.md` diff --git a/nuget.config b/nuget.config new file mode 100644 index 0000000..d8cec93 --- /dev/null +++ b/nuget.config @@ -0,0 +1,28 @@ + + + + + + + +