using System.Net; using System.Text.RegularExpressions; namespace Nop.Services.Html; /// /// Represents the HTML formatter implementation /// public partial class HtmlFormatter : IHtmlFormatter { #region Fields protected readonly IBBCodeHelper _bbCodeHelper; #endregion #region Ctor public HtmlFormatter(IBBCodeHelper bbCodeHelper) { _bbCodeHelper = bbCodeHelper; } #endregion #region Utilities /// /// Ensure only allowed HTML tags /// /// Text /// Sanitized text with all invalid tags removed protected static string EnsureOnlyAllowedHtml(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; const string allowedTags = "br,hr,b,i,u,a,div,ol,ul,li,blockquote,img,span,p,em,strong,font,pre,h1,h2,h3,h4,h5,h6,address,cite"; var m = Regex.Matches(text, "<.*?>", RegexOptions.IgnoreCase); for (var i = m.Count - 1; i >= 0; i--) { var tag = text[(m[i].Index + 1)..(m[i].Index + m[i].Length)].Trim().ToLower(); if (!IsValidTag(tag, allowedTags)) text = text.Remove(m[i].Index, m[i].Length); } return text; } /// /// Indicates whether the HTML tag is valid /// /// HTML tag to check /// List of valid tags /// True - if the tag if valid, false otherwise protected static bool IsValidTag(string tag, string tags) { var allowedTags = tags.Split(','); if (tag.Contains("javascript", StringComparison.InvariantCultureIgnoreCase)) return false; if (tag.Contains("vbscript", StringComparison.InvariantCultureIgnoreCase)) return false; if (tag.Contains("onclick", StringComparison.InvariantCultureIgnoreCase)) return false; var endChars = new[] { ' ', '>', '/', '\t' }; var pos = tag.IndexOfAny(endChars, 1); if (pos > 0) tag = tag[0..pos]; if (tag[0] == '/') tag = tag[1..^0]; foreach (var aTag in allowedTags) { if (tag == aTag) return true; } return false; } #endregion #region Methods /// /// Formats the text /// /// Text /// A value indicating whether to strip tags /// A value indicating whether HTML is allowed /// A value indicating whether HTML is allowed /// A value indicating whether BBCode is allowed /// A value indicating whether to resolve links /// A value indicating whether to add "noFollow" tag /// Formatted text public virtual string FormatText(string text, bool stripTags, bool convertPlainTextToHtml, bool allowHtml, bool allowBBCode, bool resolveLinks, bool addNoFollowTag) { if (string.IsNullOrEmpty(text)) return string.Empty; try { if (stripTags) { text = StripTags(text); } text = allowHtml ? EnsureOnlyAllowedHtml(text) : WebUtility.HtmlEncode(text); if (convertPlainTextToHtml) { text = ConvertPlainTextToHtml(text); } if (allowBBCode) { text = _bbCodeHelper.FormatText(text, true, true, true, true, true, true, true); } if (resolveLinks) { text = ResolveLinksHelper.FormatText(text); } if (addNoFollowTag) { //add noFollow tag. not implemented } } catch (Exception exc) { text = $"Text cannot be formatted. Error: {exc.Message}"; } return text; } /// /// Strips tags /// /// Text /// Formatted text public virtual string StripTags(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; text = Regex.Replace(text, @"(>)(\r|\n)*(<)", "><"); text = Regex.Replace(text, "(<[^>]*>)([^<]*)", "$2"); text = Regex.Replace(text, "(&#x?[0-9]{2,4};|"|&| |<|>|€|©|®|‰|‡|†|‹|›|„|”|“|‚|’|‘|—|–|‏|‎|‍|‌| | | |˜|ˆ|Ÿ|š|Š)", "@"); return text; } /// /// Replace anchor text (remove a tag from the following URL Name and output only the string "Name") /// /// Text /// Text public virtual string ReplaceAnchorTags(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; text = Regex.Replace(text, @"]+>([^<]*(?:(?!", "$1", RegexOptions.IgnoreCase); return text; } /// /// Converts plain text to HTML /// /// Text /// Formatted text public virtual string ConvertPlainTextToHtml(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; text = text.Replace("\r\n", "
"); text = text.Replace("\r", "
"); text = text.Replace("\n", "
"); text = text.Replace("\t", "  "); text = text.Replace(" ", "  "); return text; } /// /// Converts HTML to plain text /// /// Text /// A value indicating whether to decode text /// A value indicating whether to replace anchor text (remove a tag from the following URL Name and output only the string "Name") /// Formatted text public virtual string ConvertHtmlToPlainText(string text, bool decode = false, bool replaceAnchorTags = false) { if (string.IsNullOrEmpty(text)) return string.Empty; if (decode) text = WebUtility.HtmlDecode(text); text = text.Replace("
", "\n"); text = text.Replace("
", "\n"); text = text.Replace("
", "\n"); text = text.Replace("  ", "\t"); text = text.Replace("  ", " "); if (replaceAnchorTags) text = ReplaceAnchorTags(text); return text; } #endregion }