feat(battle-node): polite Socket.IO close on waiting-room timeout
The PvP waiting-room timeout path in BattleNodeWebSocketHandler used to return immediately after RemovePending, leaving the parked first arriver to learn about the disconnect via TCP teardown after Kestrel finished draining the request. BestHTTP / socket.io-client log that as an abrupt drop rather than a controlled disconnect. New TryPoliteCloseAsync helper emits an EIO "1" (Close) text frame, then runs the WebSocket close handshake with NormalClosure. Wrapped in try/catch + Debug log — teardown races between the server-side close and client disconnect are routine and not actionable. Uses a fresh 5s CTS so ctx.RequestAborted being canceled doesn't skip the close. Wired into both bail-out paths post-AcceptWebSocketAsync that previously just returned: - PvP waiting-room timeout / Park-Park race (the main case, per PLAN.md L104 (c)) - Unknown BattleType default case (same shape, log message already said "closing WS" but didn't actually close — opportunistic fix) PvpWaitingRoomTimeout integration test tightened: now asserts the polite "1" text frame arrives before the close handshake, not just that the WS eventually closes by any means. 172 battle-node tests passing (was 172 before the assertion tightening; the existing timeout test stayed in.) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
using System.Net.WebSockets;
|
||||
using System.Text;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using SVSim.BattleNode.Bridge;
|
||||
@@ -172,6 +174,7 @@ public sealed class BattleNodeWebSocketHandler
|
||||
"PvP waiting-room timeout or race on BattleId={Bid}; first arriver disconnected.",
|
||||
battleId);
|
||||
_store.RemovePending(battleId);
|
||||
await TryPoliteCloseAsync(ws, "waiting-room timeout", battleId);
|
||||
return;
|
||||
}
|
||||
// Retry succeeded — we're the de-facto second arriver now. Own the session.
|
||||
@@ -209,6 +212,7 @@ public sealed class BattleNodeWebSocketHandler
|
||||
|
||||
default:
|
||||
_log.LogError("Unknown BattleType={Type} for BattleId={Bid}; closing WS", pending.Type, battleId);
|
||||
await TryPoliteCloseAsync(ws, $"unknown BattleType={pending.Type}", battleId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -219,4 +223,37 @@ public sealed class BattleNodeWebSocketHandler
|
||||
if (!string.IsNullOrEmpty(header)) return header;
|
||||
return ctx.Request.Query[name].ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Emit an EIO <c>1</c> (Close) text frame, then run the WebSocket close handshake with
|
||||
/// <see cref="WebSocketCloseStatus.NormalClosure"/>. Without the EIO frame, BestHTTP /
|
||||
/// socket.io-client log the disconnect as an abrupt drop rather than a controlled
|
||||
/// disconnect; without the close handshake, the client only sees the TCP teardown after
|
||||
/// Kestrel finishes draining. Best-effort: any exception (already-torn-down socket,
|
||||
/// canceled token) is swallowed at Debug level since teardown races are routine.
|
||||
/// </summary>
|
||||
private async Task TryPoliteCloseAsync(WebSocket ws, string reason, string battleId)
|
||||
{
|
||||
// Use a fresh, short timeout — ctx.RequestAborted may already be canceled by the
|
||||
// path that decided to bail out, which would skip the close immediately.
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
try
|
||||
{
|
||||
if (ws.State == WebSocketState.Open)
|
||||
{
|
||||
var bytes = Encoding.UTF8.GetBytes(((int)EngineIoPacketType.Close).ToString());
|
||||
await ws.SendAsync(bytes, WebSocketMessageType.Text, endOfMessage: true, cts.Token);
|
||||
}
|
||||
if (ws.State is WebSocketState.Open or WebSocketState.CloseReceived)
|
||||
{
|
||||
await ws.CloseAsync(WebSocketCloseStatus.NormalClosure, reason, cts.Token);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.LogDebug(ex,
|
||||
"polite close failed on BattleId={Bid} (reason={Reason}); socket likely already torn down.",
|
||||
battleId, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -368,21 +368,28 @@ public class BattleNodeFlowTests
|
||||
// NOTE: ConsumeHandshakeAsync is NOT called here. The EIO Open frame is sent inside
|
||||
// RealParticipant.RunAsync, which only runs once the session is constructed by the
|
||||
// SECOND arriver. The first arriver who times out never receives that frame — the
|
||||
// handler parks them in AwaitSessionFinishedAsync, the waiting-room timer fires, the
|
||||
// handler's HTTP method returns, and the TestServer-side WS shuts down. ReceiveAsync
|
||||
// observes the shutdown either by returning a Close message or throwing.
|
||||
// handler parks them in AwaitSessionFinishedAsync, the waiting-room timer fires, and
|
||||
// the polite-close path emits an EIO "1" Close text frame followed by a clean
|
||||
// WebSocket close handshake before the handler returns.
|
||||
bool politeFrameObserved = false;
|
||||
bool closeObserved = false;
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var buf = new byte[1024];
|
||||
while (!closeObserved && sw.Elapsed < TimeSpan.FromSeconds(65))
|
||||
{
|
||||
try
|
||||
{
|
||||
var rr = await wsA.ReceiveAsync(new ArraySegment<byte>(new byte[1024]), ct);
|
||||
var rr = await wsA.ReceiveAsync(new ArraySegment<byte>(buf), ct);
|
||||
if (rr.MessageType == System.Net.WebSockets.WebSocketMessageType.Close)
|
||||
{
|
||||
closeObserved = true;
|
||||
break;
|
||||
}
|
||||
if (rr.MessageType == System.Net.WebSockets.WebSocketMessageType.Text)
|
||||
{
|
||||
var text = System.Text.Encoding.UTF8.GetString(buf, 0, rr.Count);
|
||||
if (text == "1") politeFrameObserved = true;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
@@ -391,6 +398,8 @@ public class BattleNodeFlowTests
|
||||
break;
|
||||
}
|
||||
}
|
||||
Assert.That(politeFrameObserved, Is.True,
|
||||
"A's WS should receive an EIO '1' Close text frame before teardown (polite-close contract).");
|
||||
Assert.That(closeObserved, Is.True,
|
||||
"A's WS should close (or ReceiveAsync should fail) after the waiting-room timeout.");
|
||||
wsA.Dispose();
|
||||
|
||||
Reference in New Issue
Block a user