();
-
- string backslashPattern = "";
-
- foreach (char c in @"\`*_{}[]()>#+-.!/")
- {
- string key = c.ToString();
- string hash = GetHashKey(key, isHtmlBlock: false);
- _escapeTable.Add(key, hash);
- _invertedEscapeTable.Add(hash, key);
- _backslashEscapeTable.Add(@"\" + key, hash);
- backslashPattern += Regex.Escape(@"\" + key) + "|";
- }
-
- _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);
- }
-
- ///
- /// current version of MarkdownSharp;
- /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute
- ///
- public string Version
- {
- get { return _version; }
- }
-
- ///
- /// Transforms the provided Markdown-formatted text to HTML;
- /// see http://en.wikipedia.org/wiki/Markdown
- ///
- ///
- /// The order in which other subs are called here is
- /// essential. Link and image substitutions need to happen before
- /// EscapeSpecialChars(), so that any *'s or _'s in the a
- /// and img tags get encoded.
- ///
- public string Transform(string text)
- {
- if (String.IsNullOrEmpty(text)) return "";
-
- Setup();
-
- text = Normalize(text);
-
- text = HashHTMLBlocks(text);
- text = StripLinkDefinitions(text);
- text = RunBlockGamut(text);
- text = Unescape(text);
-
- Cleanup();
-
- return text + "\n";
- }
-
-
- ///
- /// Perform transformations that form block-level tags like paragraphs, headers, and list items.
- ///
- private string RunBlockGamut(string text, bool unhash = true)
- {
- text = DoHeaders(text);
- text = DoHorizontalRules(text);
- text = DoLists(text);
- text = DoCodeBlocks(text);
- text = DoBlockQuotes(text);
-
- // We already ran HashHTMLBlocks() before, in Markdown(), but that
- // was to escape raw HTML in the original Markdown source. This time,
- // we're escaping the markup we've just created, so that we don't wrap
- // tags around block-level tags.
- text = HashHTMLBlocks(text);
-
- text = FormParagraphs(text, unhash: unhash);
-
- return text;
- }
-
-
- ///
- /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items.
- ///
- private string RunSpanGamut(string text)
- {
- text = DoCodeSpans(text);
- text = EscapeSpecialCharsWithinTagAttributes(text);
- text = EscapeBackslashes(text);
-
- // Images must come first, because ![foo][f] looks like an anchor.
- text = DoImages(text);
- text = DoAnchors(text);
-
- // Must come after DoAnchors(), because you can use < and >
- // delimiters in inline links like [this]().
- text = DoAutoLinks(text);
-
- text = text.Replace(AutoLinkPreventionMarker, "://");
-
- text = EncodeAmpsAndAngles(text);
- text = DoItalicsAndBold(text);
- text = DoHardBreaks(text);
-
- return text;
- }
-
- private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);
- private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);
- private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);
-
- private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled);
-
- ///
- /// splits on two or more newlines, to form "paragraphs";
- /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag
- ///
- private string FormParagraphs(string text, bool unhash = true)
- {
- // split on two or more newlines
- string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));
-
- for (int i = 0; i < grafs.Length; i++)
- {
- if (grafs[i].StartsWith("\x1AH"))
- {
- // unhashify HTML blocks
- if (unhash)
- {
- int sanityCheck = 50; // just for safety, guard against an infinite loop
- bool keepGoing = true; // as long as replacements where made, keep going
- while (keepGoing && sanityCheck > 0)
- {
- keepGoing = false;
- grafs[i] = _htmlBlockHash.Replace(grafs[i], match =>
- {
- keepGoing = true;
- return _htmlBlocks[match.Value];
- });
- sanityCheck--;
- }
- /* if (keepGoing)
- {
- // Logging of an infinite loop goes here.
- // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/
- // with the input that caused it.
- }*/
- }
- }
- else
- {
- // do span level processing inside the block, then wrap result in tags
- grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "
") + "
";
- }
- }
-
- return string.Join("\n\n", grafs);
- }
-
-
- private void Setup()
- {
- // Clear the global hashes. If we don't clear these, you get conflicts
- // from other articles when generating a page which contains more than
- // one article (e.g. an index page that shows the N most recent
- // articles):
- _urls.Clear();
- _titles.Clear();
- _htmlBlocks.Clear();
- _listLevel = 0;
- }
-
- private void Cleanup()
- {
- Setup();
- }
-
- private static string _nestedBracketsPattern;
-
- ///
- /// Reusable pattern to match balanced [brackets]. See Friedl's
- /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
- ///
- private static string GetNestedBracketsPattern()
- {
- // in other words [this] and [this[also]] and [this[also[too]]]
- // up to _nestDepth
- if (_nestedBracketsPattern == null)
- _nestedBracketsPattern =
- RepeatString(@"
- (?> # Atomic matching
- [^\[\]]+ # Anything other than brackets
- |
- \[
- ", _nestDepth) + RepeatString(
- @" \]
- )*"
- , _nestDepth);
- return _nestedBracketsPattern;
- }
-
- private static string _nestedParensPattern;
-
- ///
- /// Reusable pattern to match balanced (parens). See Friedl's
- /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
- ///
- private static string GetNestedParensPattern()
- {
- // in other words (this) and (this(also)) and (this(also(too)))
- // up to _nestDepth
- if (_nestedParensPattern == null)
- _nestedParensPattern =
- RepeatString(@"
- (?> # Atomic matching
- [^()\s]+ # Anything other than parens or whitespace
- |
- \(
- ", _nestDepth) + RepeatString(
- @" \)
- )*"
- , _nestDepth);
- return _nestedParensPattern;
- }
-
- private static Regex _linkDef = new Regex(string.Format(@"
- ^[ ]{{0,{0}}}\[([^\[\]]+)\]: # id = $1
- [ ]*
- \n? # maybe *one* newline
- [ ]*
- (\S+?)>? # url = $2
- [ ]*
- \n? # maybe one newline
- [ ]*
- (?:
- (?<=\s) # lookbehind for whitespace
- [""(]
- (.+?) # title = $3
- ["")]
- [ ]*
- )? # title is optional
- (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- ///
- /// Strips link definitions from text, stores the URLs and titles in hash references.
- ///
- ///
- /// ^[id]: url "optional title"
- ///
- private string StripLinkDefinitions(string text)
- {
- return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));
- }
-
- private string LinkEvaluator(Match match)
- {
- string linkID = match.Groups[1].Value.ToLowerInvariant();
- _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
-
- if (match.Groups[3] != null && match.Groups[3].Length > 0)
- _titles[linkID] = match.Groups[3].Value.Replace("\"", """);
-
- return "";
- }
-
- // compiling this monster regex results in worse performance. trust me.
- private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
-
-
- ///
- /// derived pretty much verbatim from PHP Markdown
- ///
- private static string GetBlockPattern()
- {
-
- // Hashify HTML blocks:
- // We only want to do this for block-level HTML tags, such as headers,
- // lists, and tables. That's because we still want to wrap s around
- // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
- // phrase emphasis, and spans. The list of tags we're looking for is
- // hard-coded:
- //
- // * List "a" is made of tags which can be both inline or block-level.
- // These will be treated block-level when the start tag is alone on
- // its line, otherwise they're not matched here and will be taken as
- // inline later.
- // * List "b" is made of tags which are always block-level;
- //
- string blockTagsA = "ins|del";
- string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";
-
- // Regular expression for the content of a block tag.
- string attr = @"
- (?> # optional tag attributes
- \s # starts with whitespace
- (?>
- [^>""/]+ # text outside quotes
- |
- /+(?!>) # slash not followed by >
- |
- ""[^""]*"" # text inside double quotes (tolerate >)
- |
- '[^']*' # text inside single quotes (tolerate >)
- )*
- )?
- ";
-
- string content = RepeatString(@"
- (?>
- [^<]+ # content without tag
- |
- <\2 # nested opening tag
- " + attr + @" # attributes
- (?>
- />
- |
- >", _nestDepth) + // end of opening tag
- ".*?" + // last level nested tag content
- RepeatString(@"
- \2\s*> # closing nested tag
- )
- |
- <(?!/\2\s*> # other tags with a different name
- )
- )*", _nestDepth);
-
- string content2 = content.Replace(@"\2", @"\3");
-
- // First, look for nested blocks, e.g.:
- //
- //
- // tags for inner block must be indented.
- //
- //
- //
- // The outermost tags must start at the left margin for this to match, and
- // the inner nested divs must be indented.
- // We need to do this before the next, more liberal match, because the next
- // match will start at the first `` and stop at the first `
`.
- string pattern = @"
- (?>
- (?>
- (?<=\n) # Starting at the beginning of a line
- | # or
- \A\n? # the beginning of the doc
- )
- ( # save in $1
-
- # Match from `\n` to `\n`, handling nested tags
- # in between.
-
- <($block_tags_b_re) # start tag = $2
- $attr> # attributes followed by > and \n
- $content # content, support nesting
- \2> # the matching end tag
- [ ]* # trailing spaces
- (?=\n+|\Z) # followed by a newline or end of document
-
- | # Special version for tags of group a.
-
- <($block_tags_a_re) # start tag = $3
- $attr>[ ]*\n # attributes followed by >
- $content2 # content, support nesting
- \3> # the matching end tag
- [ ]* # trailing spaces
- (?=\n+|\Z) # followed by a newline or end of document
-
- | # Special case just for
. It was easier to make a special
- # case than to make the other regex more complicated.
-
- [ ]{0,$less_than_tab}
-
# the matching end tag
- [ ]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
-
- | # Special case for standalone HTML comments:
-
- (?<=\n\n|\A) # preceded by a blank line or start of document
- [ ]{0,$less_than_tab}
- (?s:
-
- )
- [ ]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
-
- | # PHP and ASP-style processor instructions ( and <%)
-
- [ ]{0,$less_than_tab}
- (?s:
- <([?%]) # $4
- .*?
- \4>
- )
- [ ]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
-
- )
- )";
-
- pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());
- pattern = pattern.Replace("$block_tags_b_re", blockTagsB);
- pattern = pattern.Replace("$block_tags_a_re", blockTagsA);
- pattern = pattern.Replace("$attr", attr);
- pattern = pattern.Replace("$content2", content2);
- pattern = pattern.Replace("$content", content);
-
- return pattern;
- }
-
- ///
- /// replaces any block-level HTML blocks with hash entries
- ///
- private string HashHTMLBlocks(string text)
- {
- return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));
- }
-
- private string HtmlEvaluator(Match match)
- {
- string text = match.Groups[1].Value;
- string key = GetHashKey(text, isHtmlBlock: true);
- _htmlBlocks[key] = text;
-
- return string.Concat("\n\n", key, "\n\n");
- }
-
- private static string GetHashKey(string s, bool isHtmlBlock)
- {
- var delim = isHtmlBlock ? 'H' : 'E';
- return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim;
- }
-
- private static Regex _htmlTokens = new Regex(@"
- ()| # match
- (<\?.*?\?>)| # match " +
- RepeatString(@"
- (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +
- " # match and ",
- RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- ///
- /// returns an array of HTML tokens comprising the input string. Each token is
- /// either a tag (possibly with nested, tags contained therein, such
- /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the
- /// array is a two-element array; the first is either 'tag' or 'text'; the second is
- /// the actual value.
- ///
- private List TokenizeHTML(string text)
- {
- int pos = 0;
- int tagStart = 0;
- var tokens = new List();
-
- // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
- // http://www.bradchoate.com/past/mtregex.php
- foreach (Match m in _htmlTokens.Matches(text))
- {
- tagStart = m.Index;
-
- if (pos < tagStart)
- tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
-
- tokens.Add(new Token(TokenType.Tag, m.Value));
- pos = tagStart + m.Length;
- }
-
- if (pos < text.Length)
- tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
-
- return tokens;
- }
-
-
- private static Regex _anchorRef = new Regex(string.Format(@"
- ( # wrap whole match in $1
- \[
- ({0}) # link text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
- )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- private static Regex _anchorInline = new Regex(string.Format(@"
- ( # wrap whole match in $1
- \[
- ({0}) # link text = $2
- \]
- \( # literal paren
- [ ]*
- ({1}) # href = $3
- [ ]*
- ( # $4
- (['""]) # quote char = $5
- (.*?) # title = $6
- \5 # matching quote
- [ ]* # ignore any spaces between closing quote and )
- )? # title is optional
- \)
- )", GetNestedBracketsPattern(), GetNestedParensPattern()),
- RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- private static Regex _anchorRefShortcut = new Regex(@"
- ( # wrap whole match in $1
- \[
- ([^\[\]]+) # link text = $2; can't contain [ or ]
- \]
- )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- ///
- /// Turn Markdown link shortcuts into HTML anchor tags
- ///
- ///
- /// [link text](url "title")
- /// [link text][id]
- /// [id]
- ///
- private string DoAnchors(string text)
- {
- // First, handle reference-style links: [link text] [id]
- text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));
-
- // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")
- text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));
-
- // Last, handle reference-style shortcuts: [link text]
- // These must come last in case you've also got [link test][1]
- // or [link test](/foo)
- text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));
- return text;
- }
-
- private string SaveFromAutoLinking(string s)
- {
- return s.Replace("://", AutoLinkPreventionMarker);
- }
-
- private string AnchorRefEvaluator(Match match)
- {
- string wholeMatch = match.Groups[1].Value;
- string linkText = SaveFromAutoLinking(match.Groups[2].Value);
- string linkID = match.Groups[3].Value.ToLowerInvariant();
-
- string result;
-
- // for shortcut links like [this][].
- if (linkID == "")
- linkID = linkText.ToLowerInvariant();
-
- if (_urls.ContainsKey(linkID))
- {
- string url = _urls[linkID];
-
- url = EncodeProblemUrlChars(url);
- url = EscapeBoldItalic(url);
- result = "" + linkText + "";
- }
- else
- result = wholeMatch;
-
- return result;
- }
-
- private string AnchorRefShortcutEvaluator(Match match)
- {
- string wholeMatch = match.Groups[1].Value;
- string linkText = SaveFromAutoLinking(match.Groups[2].Value);
- string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces
-
- string result;
-
- if (_urls.ContainsKey(linkID))
- {
- string url = _urls[linkID];
-
- url = EncodeProblemUrlChars(url);
- url = EscapeBoldItalic(url);
- result = "" + linkText + "";
- }
- else
- result = wholeMatch;
-
- return result;
- }
-
-
- private string AnchorInlineEvaluator(Match match)
- {
- string linkText = SaveFromAutoLinking(match.Groups[2].Value);
- string url = match.Groups[3].Value;
- string title = match.Groups[6].Value;
- string result;
-
- url = EncodeProblemUrlChars(url);
- url = EscapeBoldItalic(url);
- if (url.StartsWith("<") && url.EndsWith(">"))
- url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present
-
- result = string.Format("{0}", linkText);
- return result;
- }
-
- private static Regex _imagesRef = new Regex(@"
- ( # wrap whole match in $1
- !\[
- (.*?) # alt text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
-
- )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
-
- private static Regex _imagesInline = new Regex(String.Format(@"
- ( # wrap whole match in $1
- !\[
- (.*?) # alt text = $2
- \]
- \s? # one optional whitespace character
- \( # literal paren
- [ ]*
- ({0}) # href = $3
- [ ]*
- ( # $4
- (['""]) # quote char = $5
- (.*?) # title = $6
- \5 # matching quote
- [ ]*
- )? # title is optional
- \)
- )", GetNestedParensPattern()),
- RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
-
- ///
- /// Turn Markdown image shortcuts into HTML img tags.
- ///
- ///
- /// ![alt text][id]
- /// 
- ///
- private string DoImages(string text)
- {
- // First, handle reference-style labeled images: ![alt text][id]
- text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));
-
- // Next, handle inline images: 
- // Don't forget: encode * and _
- text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));
-
- return text;
- }
-
- // This prevents the creation of horribly broken HTML when some syntax ambiguities
- // collide. It likely still doesn't do what the user meant, but at least we're not
- // outputting garbage.
- private string EscapeImageAltText(string s)
- {
- s = EscapeBoldItalic(s);
- s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]);
- return s;
- }
-
- private string ImageReferenceEvaluator(Match match)
- {
- string wholeMatch = match.Groups[1].Value;
- string altText = match.Groups[2].Value;
- string linkID = match.Groups[3].Value.ToLowerInvariant();
-
- // for shortcut links like ![this][].
- if (linkID == "")
- linkID = altText.ToLowerInvariant();
-
- if (_urls.ContainsKey(linkID))
- {
- string url = _urls[linkID];
- string title = null;
-
- if (_titles.ContainsKey(linkID))
- title = _titles[linkID];
-
- return ImageTag(url, altText, title);
- }
- else
- {
- // If there's no such link ID, leave intact:
- return wholeMatch;
- }
- }
-
- private string ImageInlineEvaluator(Match match)
- {
- string alt = match.Groups[2].Value;
- string url = match.Groups[3].Value;
- string title = match.Groups[6].Value;
-
- if (url.StartsWith("<") && url.EndsWith(">"))
- url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present
-
- return ImageTag(url, alt, title);
- }
-
- private string ImageTag(string url, string altText, string title)
- {
- altText = EscapeImageAltText(AttributeEncode(altText));
- url = EncodeProblemUrlChars(url);
- url = EscapeBoldItalic(url);
- var result = string.Format("
- /// Turn Markdown headers into HTML header tags
- ///
- ///
- /// Header 1
- /// ========
- ///
- /// Header 2
- /// --------
- ///
- /// # Header 1
- /// ## Header 2
- /// ## Header 2 with closing hashes ##
- /// ...
- /// ###### Header 6
- ///
- private string DoHeaders(string text)
- {
- text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));
- text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));
- return text;
- }
-
- private string SetextHeaderEvaluator(Match match)
- {
- string header = match.Groups[1].Value;
- int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;
- return string.Format("{0}\n\n", RunSpanGamut(header), level);
- }
-
- private string AtxHeaderEvaluator(Match match)
- {
- string header = match.Groups[2].Value;
- int level = match.Groups[1].Value.Length;
- return string.Format("{0}\n\n", RunSpanGamut(header), level);
- }
-
-
- private static Regex _horizontalRules = new Regex(@"
- ^[ ]{0,3} # Leading space
- ([-*_]) # $1: First marker
- (?> # Repeated marker group
- [ ]{0,2} # Zero, one, or two spaces.
- \1 # Marker character
- ){2,} # Group repeated at least twice
- [ ]* # Trailing spaces
- $ # End of line.
- ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- ///
- /// Turn Markdown horizontal rules into HTML hr tags
- ///
- ///
- /// ***
- /// * * *
- /// ---
- /// - - -
- ///
- private string DoHorizontalRules(string text)
- {
- return _horizontalRules.Replace(text, "
- /// Turn Markdown lists into HTML ul and ol and li tags
- ///
- private string DoLists(string text, bool isInsideParagraphlessListItem = false)
- {
- // We use a different prefix before nested lists than top-level lists.
- // See extended comment in _ProcessListItems().
- if (_listLevel > 0)
- text = _listNested.Replace(text, GetListEvaluator(isInsideParagraphlessListItem));
- else
- text = _listTopLevel.Replace(text, GetListEvaluator(false));
-
- return text;
- }
-
- private MatchEvaluator GetListEvaluator(bool isInsideParagraphlessListItem = false)
- {
- return new MatchEvaluator(match =>
- {
- string list = match.Groups[1].Value;
- string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";
- string result;
-
- result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL, isInsideParagraphlessListItem);
-
- result = string.Format("<{0}>\n{1}{0}>\n", listType, result);
- return result;
- });
- }
-
- ///
- /// Process the contents of a single ordered or unordered list, splitting it
- /// into individual list items.
- ///
- private string ProcessListItems(string list, string marker, bool isInsideParagraphlessListItem = false)
- {
- // The listLevel global keeps track of when we're inside a list.
- // Each time we enter a list, we increment it; when we leave a list,
- // we decrement. If it's zero, we're not in a list anymore.
-
- // We do this because when we're not inside a list, we want to treat
- // something like this:
-
- // I recommend upgrading to version
- // 8. Oops, now this line is treated
- // as a sub-list.
-
- // As a single paragraph, despite the fact that the second line starts
- // with a digit-period-space sequence.
-
- // Whereas when we're inside a list (or sub-list), that line will be
- // treated as the start of a sub-list. What a kludge, huh? This is
- // an aspect of Markdown's syntax that's hard to parse perfectly
- // without resorting to mind-reading. Perhaps the solution is to
- // change the syntax rules such that sub-lists must start with a
- // starting cardinal number; e.g. "1." or "a.".
-
- _listLevel++;
-
- // Trim trailing blank lines:
- list = Regex.Replace(list, @"\n{2,}\z", "\n");
-
- string pattern = string.Format(
- @"(^[ ]*) # leading whitespace = $1
- ({0}) [ ]+ # list marker = $2
- ((?s:.+?) # list item text = $3
- (\n+))
- (?= (\z | \1 ({0}) [ ]+))", marker);
-
- bool lastItemHadADoubleNewline = false;
-
- // has to be a closure, so subsequent invocations can share the bool
- MatchEvaluator ListItemEvaluator = (Match match) =>
- {
- string item = match.Groups[3].Value;
-
- bool endsWithDoubleNewline = item.EndsWith("\n\n");
- bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n");
-
- if (containsDoubleNewline || lastItemHadADoubleNewline)
- // we could correct any bad indentation here..
- item = RunBlockGamut(Outdent(item) + "\n", unhash: false);
- else
- {
- // recursion for sub-lists
- item = DoLists(Outdent(item), isInsideParagraphlessListItem: true);
- item = item.TrimEnd('\n');
- if (!isInsideParagraphlessListItem) // only the outer-most item should run this, otherwise it's run multiple times for the inner ones
- item = RunSpanGamut(item);
- }
- lastItemHadADoubleNewline = endsWithDoubleNewline;
- return string.Format("{0}\n", item);
- };
-
- list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),
- RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
- _listLevel--;
- return list;
- }
-
- private static Regex _codeBlock = new Regex(string.Format(@"
- (?:\n\n|\A\n?)
- ( # $1 = the code block -- one or more lines, starting with a space
- (?:
- (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces
- .*\n+
- )+
- )
- ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc",
- _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
-
- ///
- /// /// Turn Markdown 4-space indented code into HTML pre code blocks
- ///
- private string DoCodeBlocks(string text)
- {
- text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));
- return text;
- }
-
- private string CodeBlockEvaluator(Match match)
- {
- string codeBlock = match.Groups[1].Value;
-
- codeBlock = EncodeCode(Outdent(codeBlock));
- codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");
-
- return string.Concat("\n\n", codeBlock, "\n
\n\n");
- }
-
- private static Regex _codeSpan = new Regex(@"
- (?
- /// Turn Markdown `code spans` into HTML code tags
- ///
- private string DoCodeSpans(string text)
- {
- // * You can use multiple backticks as the delimiters if you want to
- // include literal backticks in the code span. So, this input:
- //
- // Just type ``foo `bar` baz`` at the prompt.
- //
- // Will translate to:
- //
- // Just type foo `bar` baz
at the prompt.
- //
- // There's no arbitrary limit to the number of backticks you
- // can use as delimters. If you need three consecutive backticks
- // in your code, use four for delimiters, etc.
- //
- // * You can use spaces to get literal backticks at the edges:
- //
- // ... type `` `bar` `` ...
- //
- // Turns to:
- //
- // ... type `bar`
...
- //
-
- return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));
- }
-
- private string CodeSpanEvaluator(Match match)
- {
- string span = match.Groups[2].Value;
- span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace
- span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace
- span = EncodeCode(span);
- span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans.
-
- return string.Concat("", span, "
");
- }
-
-
- private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
- RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
- private static Regex _strictBold = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)\2(?=\S)(.*?\S)\2\2(?!\2)(?=[\W_]|$)",
- RegexOptions.Singleline | RegexOptions.Compiled);
-
- private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",
- RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
- private static Regex _strictItalic = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)(?=\S)((?:(?!\2).)*?\S)\2(?!\2)(?=[\W_]|$)",
- RegexOptions.Singleline | RegexOptions.Compiled);
-
- ///
- /// Turn Markdown *italics* and **bold** into HTML strong and em tags
- ///
- private string DoItalicsAndBold(string text)
- {
-
- // must go first, then
- if (_strictBoldItalic)
- {
- text = _strictBold.Replace(text, "$1$3");
- text = _strictItalic.Replace(text, "$1$3");
- }
- else
- {
- text = _bold.Replace(text, "$2");
- text = _italic.Replace(text, "$2");
- }
- return text;
- }
-
- ///
- /// Turn markdown line breaks (two space at end of line) into HTML break tags
- ///
- private string DoHardBreaks(string text)
- {
- if (_autoNewlines)
- text = Regex.Replace(text, @"\n", string.Format("
[ ]? # '>' at the start of a line
- .+\n # rest of the first line
- (.+\n)* # subsequent consecutive lines
- \n* # blanks
- )+
- )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);
-
- ///
- /// Turn Markdown > quoted blocks into HTML blockquote blocks
- ///
- private string DoBlockQuotes(string text)
- {
- return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));
- }
-
- private string BlockQuoteEvaluator(Match match)
- {
- string bq = match.Groups[1].Value;
-
- bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting
- bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines
- bq = RunBlockGamut(bq); // recurse
-
- bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline);
-
- // These leading spaces screw with content, so we need to fix that:
- bq = Regex.Replace(bq, @"(\s*.+?
)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
-
- bq = string.Format("\n{0}\n
", bq);
- string key = GetHashKey(bq, isHtmlBlock: true);
- _htmlBlocks[key] = bq;
-
- return "\n\n" + key + "\n\n";
- }
-
- private string BlockQuoteEvaluator2(Match match)
- {
- return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline);
- }
-
- private const string _charInsideUrl = @"[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;" + "\x1a]";
- private const string _charEndingUrl = "[-A-Z0-9+&@#/%=~_|\\[\\])]";
-
- private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://" + _charInsideUrl + "*" + _charEndingUrl + ")(?=$|\\W)",
- RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex _endCharRegex = new Regex(_charEndingUrl, RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static string handleTrailingParens(Match match)
- {
- // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this.
- // We're not using a *real* lookbehind, because of links with in links, like
- // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched.
- // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing
- // the google link to not be matched again.
- if (match.Groups[1].Success)
- return match.Value;
-
- var protocol = match.Groups[2].Value;
- var link = match.Groups[3].Value;
- if (!link.EndsWith(")"))
- return "<" + protocol + link + ">";
- var level = 0;
- foreach (Match c in Regex.Matches(link, "[()]"))
- {
- if (c.Value == "(")
- {
- if (level <= 0)
- level = 1;
- else
- level++;
- }
- else
- {
- level--;
- }
- }
- var tail = "";
- if (level < 0)
- {
- link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; });
- }
- if (tail.Length > 0)
- {
- var lastChar = link[link.Length - 1];
- if (!_endCharRegex.IsMatch(lastChar.ToString()))
- {
- tail = lastChar + tail;
- link = link.Substring(0, link.Length - 1);
- }
- }
- return "<" + protocol + link + ">" + tail;
- }
-
- ///
- /// Turn angle-delimited URLs into HTML anchor tags
- ///
- ///
- /// <http://www.example.com>
- ///
- private string DoAutoLinks(string text)
- {
-
- if (_autoHyperlink)
- {
- // fixup arbitrary URLs by adding Markdown < > so they get linked as well
- // note that at this point, all other URL in the text are already hyperlinked as
- // *except* for the case
- text = _autolinkBare.Replace(text, handleTrailingParens);
- }
-
- // Hyperlinks:
- text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
-
- if (_linkEmails)
- {
- // Email addresses:
- string pattern =
- @"<
- (?:mailto:)?
- (
- [-.\w]+
- \@
- [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
- )
- >";
- text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
- }
-
- return text;
- }
-
- private string HyperlinkEvaluator(Match match)
- {
- string link = match.Groups[1].Value;
- return string.Format("{1}", EscapeBoldItalic(EncodeProblemUrlChars(link)), link);
- }
-
- private string EmailEvaluator(Match match)
- {
- string email = Unescape(match.Groups[1].Value);
-
- //
- // Input: an email address, e.g. "foo@example.com"
- //
- // Output: the email address as a mailto link, with each character
- // of the address encoded as either a decimal or hex entity, in
- // the hopes of foiling most address harvesting spam bots. E.g.:
- //
- // foo
- // @example.com
- //
- // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
- // mailing list:
- //
- email = "mailto:" + email;
-
- // leave ':' alone (to spot mailto: later)
- email = EncodeEmailAddress(email);
-
- email = string.Format("{0}", email);
-
- // strip the mailto: from the visible part
- email = Regex.Replace(email, "\">.+?:", "\">");
- return email;
- }
-
-
- private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);
-
- ///
- /// Remove one level of line-leading spaces
- ///
- private string Outdent(string block)
- {
- return _outDent.Replace(block, "");
- }
-
-
- #region Encoding and Normalization
-
-
- ///
- /// encodes email address randomly
- /// roughly 10% raw, 45% hex, 45% dec
- /// note that @ is always encoded and : never is
- ///
- private string EncodeEmailAddress(string addr)
- {
- var sb = new StringBuilder(addr.Length * 5);
- var rand = new Random();
- int r;
- foreach (char c in addr)
- {
- r = rand.Next(1, 100);
- if ((r > 90 || c == ':') && c != '@')
- sb.Append(c); // m
- else if (r < 45)
- sb.AppendFormat("{0:x};", (int)c); // m
- else
- sb.AppendFormat("{0};", (int)c); // m
- }
- return sb.ToString();
- }
-
- private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled);
-
- ///
- /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals
- ///
- private string EncodeCode(string code)
- {
- return _codeEncoder.Replace(code, EncodeCodeEvaluator);
- }
- private string EncodeCodeEvaluator(Match match)
- {
- switch (match.Value)
- {
- // Encode all ampersands; HTML entities are not
- // entities within a Markdown code span.
- case "&":
- return "&";
- // Do the angle bracket song and dance
- case "<":
- return "<";
- case ">":
- return ">";
- // escape characters that are magic in Markdown
- default:
- return _escapeTable[match.Value];
- }
- }
-
-
- private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
- private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
-
- ///
- /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets
- ///
- private string EncodeAmpsAndAngles(string s)
- {
- s = _amps.Replace(s, "&");
- s = _angles.Replace(s, "<");
- return s;
- }
-
- private static Regex _backslashEscapes;
-
- ///
- /// Encodes any escaped characters such as \`, \*, \[ etc
- ///
- private string EscapeBackslashes(string s)
- {
- return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));
- }
- private string EscapeBackslashesEvaluator(Match match)
- {
- return _backslashEscapeTable[match.Value];
- }
-
- private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled);
-
- ///
- /// swap back in all the special characters we've hidden
- ///
- private string Unescape(string s)
- {
- return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));
- }
- private string UnescapeEvaluator(Match match)
- {
- return _invertedEscapeTable[match.Value];
- }
-
-
- ///
- /// escapes Bold [ * ] and Italic [ _ ] characters
- ///
- private string EscapeBoldItalic(string s)
- {
- s = s.Replace("*", _escapeTable["*"]);
- s = s.Replace("_", _escapeTable["_"]);
- return s;
- }
-
- private static string AttributeEncode(string s)
- {
- return s.Replace(">", ">").Replace("<", "<").Replace("\"", """);
- }
-
- private static readonly char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();
-
- ///
- /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
- ///
- private string EncodeProblemUrlChars(string url)
- {
- if (!_encodeProblemUrlCharacters) return url;
-
- var sb = new StringBuilder(url.Length);
- bool encode;
- char c;
-
- for (int i = 0; i < url.Length; i++)
- {
- c = url[i];
- encode = Array.IndexOf(_problemUrlChars, c) != -1;
- if (encode && c == ':' && i < url.Length - 1)
- encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');
-
- if (encode)
- sb.Append("%" + String.Format("{0:x}", (byte)c));
- else
- sb.Append(c);
- }
-
- return sb.ToString();
- }
-
-
- ///
- /// Within tags -- meaning between < and > -- encode [\ ` * _] so they
- /// don't conflict with their use in Markdown for code, italics and strong.
- /// We're replacing each such character with its corresponding hash
- /// value; this is likely overkill, but it should prevent us from colliding
- /// with the escape values by accident.
- ///
- private string EscapeSpecialCharsWithinTagAttributes(string text)
- {
- var tokens = TokenizeHTML(text);
-
- // now, rebuild text from the tokens
- var sb = new StringBuilder(text.Length);
-
- foreach (var token in tokens)
- {
- string value = token.Value;
-
- if (token.Type == TokenType.Tag)
- {
- value = value.Replace(@"\", _escapeTable[@"\"]);
-
- if (_autoHyperlink && value.StartsWith("(?=.)", _escapeTable[@"`"]);
- value = EscapeBoldItalic(value);
- }
-
- sb.Append(value);
- }
-
- return sb.ToString();
- }
-
- ///
- /// convert all tabs to _tabWidth spaces;
- /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF);
- /// makes sure text ends with a couple of newlines;
- /// removes any blank lines (only spaces) in the text
- ///
- private string Normalize(string text)
- {
- var output = new StringBuilder(text.Length);
- var line = new StringBuilder();
- bool valid = false;
-
- for (int i = 0; i < text.Length; i++)
- {
- switch (text[i])
- {
- case '\n':
- if (valid) output.Append(line);
- output.Append('\n');
- line.Length = 0; valid = false;
- break;
- case '\r':
- if ((i < text.Length - 1) && (text[i + 1] != '\n'))
- {
- if (valid) output.Append(line);
- output.Append('\n');
- line.Length = 0; valid = false;
- }
- break;
- case '\t':
- int width = (_tabWidth - line.Length % _tabWidth);
- for (int k = 0; k < width; k++)
- line.Append(' ');
- break;
- case '\x1A':
- break;
- default:
- if (!valid && text[i] != ' ') valid = true;
- line.Append(text[i]);
- break;
- }
- }
-
- if (valid) output.Append(line);
- output.Append('\n');
-
- // add two newlines to the end before return
- return output.Append("\n\n").ToString();
- }
-
- #endregion
-
- ///
- /// this is to emulate what's evailable in PHP
- ///
- private static string RepeatString(string text, int count)
- {
- var sb = new StringBuilder(text.Length * count);
- for (int i = 0; i < count; i++)
- sb.Append(text);
- return sb.ToString();
- }
-
- }
-}
\ No newline at end of file