موارد مهم این نکات رو اگر تبدیل به یک متد کمکی کنیم، کلاس زیر بدست خواهد آمد:
با این تستها جهت بررسی آن:
using System; using System.Web; namespace UrlNormalizationTest { public static class UrlNormalization { public static bool AreTheSameUrls(this string url1, string url2) { url1 = url1.NormalizeUrl(); url2 = url2.NormalizeUrl(); return url1.Equals(url2); } public static bool AreTheSameUrls(this Uri uri1, Uri uri2) { var url1 = uri1.NormalizeUrl(); var url2 = uri2.NormalizeUrl(); return url1.Equals(url2); } public static string[] DefaultDirectoryIndexes = new[] { "default.asp", "default.aspx", "index.htm", "index.html", "index.php" }; public static string NormalizeUrl(this Uri uri) { var url = urlToLower(uri); url = limitProtocols(url); url = removeDefaultDirectoryIndexes(url); url = removeTheFragment(url); url = removeDuplicateSlashes(url); url = addWww(url); url = removeFeedburnerPart(url); return removeTrailingSlashAndEmptyQuery(url); } public static string NormalizeUrl(this string url) { return NormalizeUrl(new Uri(url)); } private static string removeFeedburnerPart(string url) { var idx = url.IndexOf("utm_source=", StringComparison.Ordinal); return idx == -1 ? url : url.Substring(0, idx - 1); } private static string addWww(string url) { if (new Uri(url).Host.Split('.').Length == 2 && !url.Contains("://www.")) { return url.Replace("://", "://www."); } return url; } private static string removeDuplicateSlashes(string url) { var path = new Uri(url).AbsolutePath; return path.Contains("//") ? url.Replace(path, path.Replace("//", "/")) : url; } private static string limitProtocols(string url) { return new Uri(url).Scheme == "https" ? url.Replace("https://", "http://") : url; } private static string removeTheFragment(string url) { var fragment = new Uri(url).Fragment; return string.IsNullOrWhiteSpace(fragment) ? url : url.Replace(fragment, string.Empty); } private static string urlToLower(Uri uri) { return HttpUtility.UrlDecode(uri.AbsoluteUri.ToLowerInvariant()); } private static string removeTrailingSlashAndEmptyQuery(string url) { return url .TrimEnd(new[] { '?' }) .TrimEnd(new[] { '/' }); } private static string removeDefaultDirectoryIndexes(string url) { foreach (var index in DefaultDirectoryIndexes) { if (url.EndsWith(index)) { url = url.TrimEnd(index.ToCharArray()); break; } } return url; } } }
using NUnit.Framework; using UrlNormalizationTest; namespace UrlNormalization.Tests { [TestFixture] public class UnitTests { [Test] public void Test1ConvertingTheSchemeAndHostToLowercase() { var url1 = "HTTP://www.Example.com/".NormalizeUrl(); var url2 = "http://www.example.com/".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test2CapitalizingLettersInEscapeSequences() { var url1 = "http://www.example.com/a%c2%b1b".NormalizeUrl(); var url2 = "http://www.example.com/a%C2%B1b".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test3DecodingPercentEncodedOctetsOfUnreservedCharacters() { var url1 = "http://www.example.com/%7Eusername/".NormalizeUrl(); var url2 = "http://www.example.com/~username/".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test4RemovingTheDefaultPort() { var url1 = "http://www.example.com:80/bar.html".NormalizeUrl(); var url2 = "http://www.example.com/bar.html".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test5AddingTrailing() { var url1 = "http://www.example.com/alice".NormalizeUrl(); var url2 = "http://www.example.com/alice/?".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test6RemovingDotSegments() { var url1 = "http://www.example.com/../a/b/../c/./d.html".NormalizeUrl(); var url2 = "http://www.example.com/a/c/d.html".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test7RemovingDirectoryIndex1() { var url1 = "http://www.example.com/default.asp".NormalizeUrl(); var url2 = "http://www.example.com/".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test7RemovingDirectoryIndex2() { var url1 = "http://www.example.com/default.asp?id=1".NormalizeUrl(); var url2 = "http://www.example.com/default.asp?id=1".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test7RemovingDirectoryIndex3() { var url1 = "http://www.example.com/a/index.html".NormalizeUrl(); var url2 = "http://www.example.com/a/".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test8RemovingTheFragment() { var url1 = "http://www.example.com/bar.html#section1".NormalizeUrl(); var url2 = "http://www.example.com/bar.html".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test9LimitingProtocols() { var url1 = "https://www.example.com/".NormalizeUrl(); var url2 = "http://www.example.com/".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test10RemovingDuplicateSlashes() { var url1 = "http://www.example.com/foo//bar.html".NormalizeUrl(); var url2 = "http://www.example.com/foo/bar.html".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test11AddWww() { var url1 = "http://example.com/".NormalizeUrl(); var url2 = "http://www.example.com".NormalizeUrl(); Assert.AreEqual(url1, url2); } [Test] public void Test12RemoveFeedburnerPart() { var url1 = "http://site.net/2013/02/firefox-19-released/?utm_source=rss&utm_medium=rss&utm_campaign=firefox-19-released".NormalizeUrl(); var url2 = "http://site.net/2013/02/firefox-19-released".NormalizeUrl(); Assert.AreEqual(url1, url2); } } }