-
Notifications
You must be signed in to change notification settings - Fork 9
feat(parser): enhance user agent parsing logic and add tests for inva… #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
99d5f6d
feat(parser): enhance user agent parsing logic and add tests for inva…
BenjaminAbt 5f56311
fix(parser): improve browser detection logic and clean up tests
BenjaminAbt 9c7ad02
add updated bench
BenjaminAbt af6d579
re-add deleted comment
BenjaminAbt 942b99b
fix duplicate ua span
BenjaminAbt f8d2e26
remove old code
BenjaminAbt a54935d
remove old comment
BenjaminAbt bb953fc
remove old comment
BenjaminAbt File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,6 @@ namespace MyCSharp.HttpUserAgentParser; | |
| /// Parser logic for user agents | ||
| /// </summary> | ||
| public static class HttpUserAgentParser | ||
|
|
||
| { | ||
| /// <summary> | ||
| /// Parses given <param name="userAgent">user agent</param> | ||
|
|
@@ -48,7 +47,6 @@ public static HttpUserAgentInformation Parse(string userAgent) | |
| /// </summary> | ||
| public static HttpUserAgentPlatformInformation? GetPlatform(string userAgent) | ||
| { | ||
| // Fast, allocation-free token scan (keeps public statics untouched) | ||
| ReadOnlySpan<char> ua = userAgent.AsSpan(); | ||
| foreach ((string Token, string Name, HttpUserAgentPlatformType PlatformType) platform in HttpUserAgentStatics.s_platformRules) | ||
| { | ||
|
|
@@ -78,6 +76,7 @@ public static bool TryGetPlatform(string userAgent, [NotNullWhen(true)] out Http | |
| public static (string Name, string? Version)? GetBrowser(string userAgent) | ||
| { | ||
| ReadOnlySpan<char> ua = userAgent.AsSpan(); | ||
|
|
||
| foreach ((string Name, string DetectToken, string? VersionToken) browserRule in HttpUserAgentStatics.s_browserRules) | ||
| { | ||
| if (!TryIndexOf(ua, browserRule.DetectToken, out int detectIndex)) | ||
|
|
@@ -86,7 +85,18 @@ public static (string Name, string? Version)? GetBrowser(string userAgent) | |
| } | ||
|
|
||
| // Version token may differ (e.g., Safari uses "Version/") | ||
| int versionSearchStart = detectIndex; | ||
|
|
||
| int versionSearchStart; | ||
| // For rules without a specific version token, ensure pattern Token/<digits> | ||
| if (string.IsNullOrEmpty(browserRule.VersionToken)) | ||
| { | ||
| int afterDetect = detectIndex + browserRule.DetectToken.Length; | ||
| if (afterDetect >= ua.Length || ua[afterDetect] != '/') | ||
| { | ||
| // Likely a misspelling or partial token (e.g., Edgg, Oprea, Chromee) | ||
| continue; | ||
| } | ||
| } | ||
| if (!string.IsNullOrEmpty(browserRule.VersionToken)) | ||
| { | ||
| if (TryIndexOf(ua, browserRule.VersionToken!, out int vtIndex)) | ||
|
|
@@ -104,14 +114,14 @@ public static (string Name, string? Version)? GetBrowser(string userAgent) | |
| versionSearchStart = detectIndex + browserRule.DetectToken.Length; | ||
| } | ||
|
|
||
| string? version = null; | ||
| ua = ua.Slice(versionSearchStart); | ||
| if (TryExtractVersion(ua, out Range range)) | ||
| ReadOnlySpan<char> search = ua.Slice(versionSearchStart); | ||
| if (TryExtractVersion(search, out Range range)) | ||
| { | ||
| version = ua[range].ToString(); | ||
| string? version = search[range].ToString(); | ||
| return (browserRule.Name, version); | ||
| } | ||
|
|
||
| return (browserRule.Name, version); | ||
| // If we didn't find a version for this rule, try next rule | ||
| } | ||
|
|
||
| return null; | ||
|
|
@@ -198,39 +208,43 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran | |
|
|
||
| // Limit search window to avoid scanning entire UA string unnecessarily | ||
| const int Window = 128; | ||
| if (haystack.Length >= Window) | ||
| if (haystack.Length > Window) | ||
| { | ||
| haystack = haystack.Slice(0, Window); | ||
| } | ||
|
|
||
| int i = 0; | ||
| for (; i < haystack.Length; ++i) | ||
| // Find first digit | ||
| int start = -1; | ||
| for (int i = 0; i < haystack.Length; i++) | ||
| { | ||
| char c = haystack[i]; | ||
| if (char.IsBetween(c, '0', '9')) | ||
| if (c >= '0' && c <= '9') | ||
| { | ||
| start = i; | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| int s = i; | ||
| haystack = haystack.Slice(i + 1); | ||
| for (i = 0; i < haystack.Length; ++i) | ||
| if (start < 0) | ||
| { | ||
| char c = haystack[i]; | ||
| if (!(char.IsBetween(c, '0', '9') || c == '.')) | ||
| { | ||
| break; | ||
| } | ||
| // No digit found => no version | ||
| return false; | ||
| } | ||
| i += s + 1; // shift back the previous domain | ||
|
|
||
| if (i == s) | ||
| // Consume digits and dots after first digit | ||
| int end = start + 1; | ||
| while (end < haystack.Length) | ||
| { | ||
| return false; | ||
| char c = haystack[end]; | ||
| if (!((c >= '0' && c <= '9') || c == '.')) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. |
||
| { | ||
| break; | ||
| } | ||
| end++; | ||
| } | ||
|
|
||
| range = new Range(s, i); | ||
| // Create exclusive end range | ||
| range = new Range(start, end); | ||
| return true; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is two comparisons now, instead of just one. Either use the
uintcast directly, or usechar.IsBetweenwhich does the same thing but more readable IMO.Only the JIT for .NET 10+ is able to optimize that to one comparison (by using the
uintcast).