();
+
+ string backslashPattern = "";
+
+ foreach (char c in @"\`*_{}[]()>#+-.!/")
+ {
+ string key = c.ToString();
+ string hash = GetHashKey(key, isHtmlBlock: false);
+ _escapeTable.Add(key, hash);
+ _invertedEscapeTable.Add(hash, key);
+ _backslashEscapeTable.Add(@"\" + key, hash);
+ backslashPattern += Regex.Escape(@"\" + key) + "|";
+ }
+
+ _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);
+ }
+
+ ///
+ /// current version of MarkdownSharp;
+ /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute
+ ///
+ public string Version
+ {
+ get { return _version; }
+ }
+
+ ///
+ /// Transforms the provided Markdown-formatted text to HTML;
+ /// see http://en.wikipedia.org/wiki/Markdown
+ ///
+ ///
+ /// The order in which other subs are called here is
+ /// essential. Link and image substitutions need to happen before
+ /// EscapeSpecialChars(), so that any *'s or _'s in the a
+ /// and img tags get encoded.
+ ///
+ public string Transform(string text)
+ {
+ if (String.IsNullOrEmpty(text)) return "";
+
+ Setup();
+
+ text = Normalize(text);
+
+ text = HashHTMLBlocks(text);
+ text = StripLinkDefinitions(text);
+ text = RunBlockGamut(text);
+ text = Unescape(text);
+
+ Cleanup();
+
+ return text + "\n";
+ }
+
+
+ ///
+ /// Perform transformations that form block-level tags like paragraphs, headers, and list items.
+ ///
+ private string RunBlockGamut(string text, bool unhash = true)
+ {
+ text = DoHeaders(text);
+ text = DoHorizontalRules(text);
+ text = DoLists(text);
+ text = DoCodeBlocks(text);
+ text = DoBlockQuotes(text);
+
+ // We already ran HashHTMLBlocks() before, in Markdown(), but that
+ // was to escape raw HTML in the original Markdown source. This time,
+ // we're escaping the markup we've just created, so that we don't wrap
+ // tags around block-level tags.
+ text = HashHTMLBlocks(text);
+
+ text = FormParagraphs(text, unhash: unhash);
+
+ return text;
+ }
+
+
+ ///
+ /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items.
+ ///
+ private string RunSpanGamut(string text)
+ {
+ text = DoCodeSpans(text);
+ text = EscapeSpecialCharsWithinTagAttributes(text);
+ text = EscapeBackslashes(text);
+
+ // Images must come first, because ![foo][f] looks like an anchor.
+ text = DoImages(text);
+ text = DoAnchors(text);
+
+ // Must come after DoAnchors(), because you can use < and >
+ // delimiters in inline links like [this]().
+ text = DoAutoLinks(text);
+
+ text = text.Replace(AutoLinkPreventionMarker, "://");
+
+ text = EncodeAmpsAndAngles(text);
+ text = DoItalicsAndBold(text);
+ text = DoHardBreaks(text);
+
+ return text;
+ }
+
+ private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);
+ private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);
+ private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);
+
+ private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled);
+
+ ///
+ /// splits on two or more newlines, to form "paragraphs";
+ /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag
+ ///
+ private string FormParagraphs(string text, bool unhash = true)
+ {
+ // split on two or more newlines
+ string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));
+
+ for (int i = 0; i < grafs.Length; i++)
+ {
+ if (grafs[i].StartsWith("\x1AH"))
+ {
+ // unhashify HTML blocks
+ if (unhash)
+ {
+ int sanityCheck = 50; // just for safety, guard against an infinite loop
+ bool keepGoing = true; // as long as replacements where made, keep going
+ while (keepGoing && sanityCheck > 0)
+ {
+ keepGoing = false;
+ grafs[i] = _htmlBlockHash.Replace(grafs[i], match =>
+ {
+ keepGoing = true;
+ return _htmlBlocks[match.Value];
+ });
+ sanityCheck--;
+ }
+ /* if (keepGoing)
+ {
+ // Logging of an infinite loop goes here.
+ // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/
+ // with the input that caused it.
+ }*/
+ }
+ }
+ else
+ {
+ // do span level processing inside the block, then wrap result in tags
+ grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "
") + "
";
+ }
+ }
+
+ return string.Join("\n\n", grafs);
+ }
+
+
+ private void Setup()
+ {
+ // Clear the global hashes. If we don't clear these, you get conflicts
+ // from other articles when generating a page which contains more than
+ // one article (e.g. an index page that shows the N most recent
+ // articles):
+ _urls.Clear();
+ _titles.Clear();
+ _htmlBlocks.Clear();
+ _listLevel = 0;
+ }
+
+ private void Cleanup()
+ {
+ Setup();
+ }
+
+ private static string _nestedBracketsPattern;
+
+ ///
+ /// Reusable pattern to match balanced [brackets]. See Friedl's
+ /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+ ///
+ private static string GetNestedBracketsPattern()
+ {
+ // in other words [this] and [this[also]] and [this[also[too]]]
+ // up to _nestDepth
+ if (_nestedBracketsPattern == null)
+ _nestedBracketsPattern =
+ RepeatString(@"
+ (?> # Atomic matching
+ [^\[\]]+ # Anything other than brackets
+ |
+ \[
+ ", _nestDepth) + RepeatString(
+ @" \]
+ )*"
+ , _nestDepth);
+ return _nestedBracketsPattern;
+ }
+
+ private static string _nestedParensPattern;
+
+ ///
+ /// Reusable pattern to match balanced (parens). See Friedl's
+ /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+ ///
+ private static string GetNestedParensPattern()
+ {
+ // in other words (this) and (this(also)) and (this(also(too)))
+ // up to _nestDepth
+ if (_nestedParensPattern == null)
+ _nestedParensPattern =
+ RepeatString(@"
+ (?> # Atomic matching
+ [^()\s]+ # Anything other than parens or whitespace
+ |
+ \(
+ ", _nestDepth) + RepeatString(
+ @" \)
+ )*"
+ , _nestDepth);
+ return _nestedParensPattern;
+ }
+
+ private static Regex _linkDef = new Regex(string.Format(@"
+ ^[ ]{{0,{0}}}\[([^\[\]]+)\]: # id = $1
+ [ ]*
+ \n? # maybe *one* newline
+ [ ]*
+ (\S+?)>? # url = $2
+ [ ]*
+ \n? # maybe one newline
+ [ ]*
+ (?:
+ (?<=\s) # lookbehind for whitespace
+ [""(]
+ (.+?) # title = $3
+ ["")]
+ [ ]*
+ )? # title is optional
+ (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Strips link definitions from text, stores the URLs and titles in hash references.
+ ///
+ ///
+ /// ^[id]: url "optional title"
+ ///
+ private string StripLinkDefinitions(string text)
+ {
+ return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));
+ }
+
+ private string LinkEvaluator(Match match)
+ {
+ string linkID = match.Groups[1].Value.ToLowerInvariant();
+ _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
+
+ if (match.Groups[3] != null && match.Groups[3].Length > 0)
+ _titles[linkID] = match.Groups[3].Value.Replace("\"", """);
+
+ return "";
+ }
+
+ // compiling this monster regex results in worse performance. trust me.
+ private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+
+
+ ///
+ /// derived pretty much verbatim from PHP Markdown
+ ///
+ private static string GetBlockPattern()
+ {
+
+ // Hashify HTML blocks:
+ // We only want to do this for block-level HTML tags, such as headers,
+ // lists, and tables. That's because we still want to wrap s around
+ // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ // phrase emphasis, and spans. The list of tags we're looking for is
+ // hard-coded:
+ //
+ // * List "a" is made of tags which can be both inline or block-level.
+ // These will be treated block-level when the start tag is alone on
+ // its line, otherwise they're not matched here and will be taken as
+ // inline later.
+ // * List "b" is made of tags which are always block-level;
+ //
+ string blockTagsA = "ins|del";
+ string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";
+
+ // Regular expression for the content of a block tag.
+ string attr = @"
+ (?> # optional tag attributes
+ \s # starts with whitespace
+ (?>
+ [^>""/]+ # text outside quotes
+ |
+ /+(?!>) # slash not followed by >
+ |
+ ""[^""]*"" # text inside double quotes (tolerate >)
+ |
+ '[^']*' # text inside single quotes (tolerate >)
+ )*
+ )?
+ ";
+
+ string content = RepeatString(@"
+ (?>
+ [^<]+ # content without tag
+ |
+ <\2 # nested opening tag
+ " + attr + @" # attributes
+ (?>
+ />
+ |
+ >", _nestDepth) + // end of opening tag
+ ".*?" + // last level nested tag content
+ RepeatString(@"
+ \2\s*> # closing nested tag
+ )
+ |
+ <(?!/\2\s*> # other tags with a different name
+ )
+ )*", _nestDepth);
+
+ string content2 = content.Replace(@"\2", @"\3");
+
+ // First, look for nested blocks, e.g.:
+ //
+ //
+ // tags for inner block must be indented.
+ //
+ //
+ //
+ // The outermost tags must start at the left margin for this to match, and
+ // the inner nested divs must be indented.
+ // We need to do this before the next, more liberal match, because the next
+ // match will start at the first `` and stop at the first `
`.
+ string pattern = @"
+ (?>
+ (?>
+ (?<=\n) # Starting at the beginning of a line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+
+ # Match from `\n` to `\n`, handling nested tags
+ # in between.
+
+ <($block_tags_b_re) # start tag = $2
+ $attr> # attributes followed by > and \n
+ $content # content, support nesting
+ \2> # the matching end tag
+ [ ]* # trailing spaces
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special version for tags of group a.
+
+ <($block_tags_a_re) # start tag = $3
+ $attr>[ ]*\n # attributes followed by >
+ $content2 # content, support nesting
+ \3> # the matching end tag
+ [ ]* # trailing spaces
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special case just for
. It was easier to make a special
+ # case than to make the other regex more complicated.
+
+ [ ]{0,$less_than_tab}
+
# the matching end tag
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # Special case for standalone HTML comments:
+
+ (?<=\n\n|\A) # preceded by a blank line or start of document
+ [ ]{0,$less_than_tab}
+ (?s:
+
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # PHP and ASP-style processor instructions ( and <%)
+
+ [ ]{0,$less_than_tab}
+ (?s:
+ <([?%]) # $4
+ .*?
+ \4>
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ )
+ )";
+
+ pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());
+ pattern = pattern.Replace("$block_tags_b_re", blockTagsB);
+ pattern = pattern.Replace("$block_tags_a_re", blockTagsA);
+ pattern = pattern.Replace("$attr", attr);
+ pattern = pattern.Replace("$content2", content2);
+ pattern = pattern.Replace("$content", content);
+
+ return pattern;
+ }
+
+ ///
+ /// replaces any block-level HTML blocks with hash entries
+ ///
+ private string HashHTMLBlocks(string text)
+ {
+ return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));
+ }
+
+ private string HtmlEvaluator(Match match)
+ {
+ string text = match.Groups[1].Value;
+ string key = GetHashKey(text, isHtmlBlock: true);
+ _htmlBlocks[key] = text;
+
+ return string.Concat("\n\n", key, "\n\n");
+ }
+
+ private static string GetHashKey(string s, bool isHtmlBlock)
+ {
+ var delim = isHtmlBlock ? 'H' : 'E';
+ return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim;
+ }
+
+ private static Regex _htmlTokens = new Regex(@"
+ ()| # match
+ (<\?.*?\?>)| # match " +
+ RepeatString(@"
+ (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +
+ " # match and ",
+ RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// returns an array of HTML tokens comprising the input string. Each token is
+ /// either a tag (possibly with nested, tags contained therein, such
+ /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the
+ /// array is a two-element array; the first is either 'tag' or 'text'; the second is
+ /// the actual value.
+ ///
+ private List TokenizeHTML(string text)
+ {
+ int pos = 0;
+ int tagStart = 0;
+ var tokens = new List();
+
+ // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
+ // http://www.bradchoate.com/past/mtregex.php
+ foreach (Match m in _htmlTokens.Matches(text))
+ {
+ tagStart = m.Index;
+
+ if (pos < tagStart)
+ tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
+
+ tokens.Add(new Token(TokenType.Tag, m.Value));
+ pos = tagStart + m.Length;
+ }
+
+ if (pos < text.Length)
+ tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
+
+ return tokens;
+ }
+
+
+ private static Regex _anchorRef = new Regex(string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+ )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ private static Regex _anchorInline = new Regex(string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+ \( # literal paren
+ [ ]*
+ ({1}) # href = $3
+ [ ]*
+ ( # $4
+ (['""]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ ]* # ignore any spaces between closing quote and )
+ )? # title is optional
+ \)
+ )", GetNestedBracketsPattern(), GetNestedParensPattern()),
+ RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ private static Regex _anchorRefShortcut = new Regex(@"
+ ( # wrap whole match in $1
+ \[
+ ([^\[\]]+) # link text = $2; can't contain [ or ]
+ \]
+ )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown link shortcuts into HTML anchor tags
+ ///
+ ///
+ /// [link text](url "title")
+ /// [link text][id]
+ /// [id]
+ ///
+ private string DoAnchors(string text)
+ {
+ // First, handle reference-style links: [link text] [id]
+ text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));
+
+ // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")
+ text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));
+
+ // Last, handle reference-style shortcuts: [link text]
+ // These must come last in case you've also got [link test][1]
+ // or [link test](/foo)
+ text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));
+ return text;
+ }
+
+ private string SaveFromAutoLinking(string s)
+ {
+ return s.Replace("://", AutoLinkPreventionMarker);
+ }
+
+ private string AnchorRefEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string linkText = SaveFromAutoLinking(match.Groups[2].Value);
+ string linkID = match.Groups[3].Value.ToLowerInvariant();
+
+ string result;
+
+ // for shortcut links like [this][].
+ if (linkID == "")
+ linkID = linkText.ToLowerInvariant();
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ result = "" + linkText + "";
+ }
+ else
+ result = wholeMatch;
+
+ return result;
+ }
+
+ private string AnchorRefShortcutEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string linkText = SaveFromAutoLinking(match.Groups[2].Value);
+ string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces
+
+ string result;
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ result = "" + linkText + "";
+ }
+ else
+ result = wholeMatch;
+
+ return result;
+ }
+
+
+ private string AnchorInlineEvaluator(Match match)
+ {
+ string linkText = SaveFromAutoLinking(match.Groups[2].Value);
+ string url = match.Groups[3].Value;
+ string title = match.Groups[6].Value;
+ string result;
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ if (url.StartsWith("<") && url.EndsWith(">"))
+ url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present
+
+ result = string.Format("{0}", linkText);
+ return result;
+ }
+
+ private static Regex _imagesRef = new Regex(@"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ private static Regex _imagesInline = new Regex(String.Format(@"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+ \s? # one optional whitespace character
+ \( # literal paren
+ [ ]*
+ ({0}) # href = $3
+ [ ]*
+ ( # $4
+ (['""]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ ]*
+ )? # title is optional
+ \)
+ )", GetNestedParensPattern()),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown image shortcuts into HTML img tags.
+ ///
+ ///
+ /// ![alt text][id]
+ /// 
+ ///
+ private string DoImages(string text)
+ {
+ // First, handle reference-style labeled images: ![alt text][id]
+ text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));
+
+ // Next, handle inline images: 
+ // Don't forget: encode * and _
+ text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));
+
+ return text;
+ }
+
+ // This prevents the creation of horribly broken HTML when some syntax ambiguities
+ // collide. It likely still doesn't do what the user meant, but at least we're not
+ // outputting garbage.
+ private string EscapeImageAltText(string s)
+ {
+ s = EscapeBoldItalic(s);
+ s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]);
+ return s;
+ }
+
+ private string ImageReferenceEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string altText = match.Groups[2].Value;
+ string linkID = match.Groups[3].Value.ToLowerInvariant();
+
+ // for shortcut links like ![this][].
+ if (linkID == "")
+ linkID = altText.ToLowerInvariant();
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+ string title = null;
+
+ if (_titles.ContainsKey(linkID))
+ title = _titles[linkID];
+
+ return ImageTag(url, altText, title);
+ }
+ else
+ {
+ // If there's no such link ID, leave intact:
+ return wholeMatch;
+ }
+ }
+
+ private string ImageInlineEvaluator(Match match)
+ {
+ string alt = match.Groups[2].Value;
+ string url = match.Groups[3].Value;
+ string title = match.Groups[6].Value;
+
+ if (url.StartsWith("<") && url.EndsWith(">"))
+ url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present
+
+ return ImageTag(url, alt, title);
+ }
+
+ private string ImageTag(string url, string altText, string title)
+ {
+ altText = EscapeImageAltText(AttributeEncode(altText));
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ var result = string.Format("
+ /// Turn Markdown headers into HTML header tags
+ ///
+ ///
+ /// Header 1
+ /// ========
+ ///
+ /// Header 2
+ /// --------
+ ///
+ /// # Header 1
+ /// ## Header 2
+ /// ## Header 2 with closing hashes ##
+ /// ...
+ /// ###### Header 6
+ ///
+ private string DoHeaders(string text)
+ {
+ text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));
+ text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));
+ return text;
+ }
+
+ private string SetextHeaderEvaluator(Match match)
+ {
+ string header = match.Groups[1].Value;
+ int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;
+ return string.Format("{0}\n\n", RunSpanGamut(header), level);
+ }
+
+ private string AtxHeaderEvaluator(Match match)
+ {
+ string header = match.Groups[2].Value;
+ int level = match.Groups[1].Value.Length;
+ return string.Format("{0}\n\n", RunSpanGamut(header), level);
+ }
+
+
+ private static Regex _horizontalRules = new Regex(@"
+ ^[ ]{0,3} # Leading space
+ ([-*_]) # $1: First marker
+ (?> # Repeated marker group
+ [ ]{0,2} # Zero, one, or two spaces.
+ \1 # Marker character
+ ){2,} # Group repeated at least twice
+ [ ]* # Trailing spaces
+ $ # End of line.
+ ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown horizontal rules into HTML hr tags
+ ///
+ ///
+ /// ***
+ /// * * *
+ /// ---
+ /// - - -
+ ///
+ private string DoHorizontalRules(string text)
+ {
+ return _horizontalRules.Replace(text, "
+ /// Turn Markdown lists into HTML ul and ol and li tags
+ ///
+ private string DoLists(string text, bool isInsideParagraphlessListItem = false)
+ {
+ // We use a different prefix before nested lists than top-level lists.
+ // See extended comment in _ProcessListItems().
+ if (_listLevel > 0)
+ text = _listNested.Replace(text, GetListEvaluator(isInsideParagraphlessListItem));
+ else
+ text = _listTopLevel.Replace(text, GetListEvaluator(false));
+
+ return text;
+ }
+
+ private MatchEvaluator GetListEvaluator(bool isInsideParagraphlessListItem = false)
+ {
+ return new MatchEvaluator(match =>
+ {
+ string list = match.Groups[1].Value;
+ string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";
+ string result;
+
+ result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL, isInsideParagraphlessListItem);
+
+ result = string.Format("<{0}>\n{1}{0}>\n", listType, result);
+ return result;
+ });
+ }
+
+ ///
+ /// Process the contents of a single ordered or unordered list, splitting it
+ /// into individual list items.
+ ///
+ private string ProcessListItems(string list, string marker, bool isInsideParagraphlessListItem = false)
+ {
+ // The listLevel global keeps track of when we're inside a list.
+ // Each time we enter a list, we increment it; when we leave a list,
+ // we decrement. If it's zero, we're not in a list anymore.
+
+ // We do this because when we're not inside a list, we want to treat
+ // something like this:
+
+ // I recommend upgrading to version
+ // 8. Oops, now this line is treated
+ // as a sub-list.
+
+ // As a single paragraph, despite the fact that the second line starts
+ // with a digit-period-space sequence.
+
+ // Whereas when we're inside a list (or sub-list), that line will be
+ // treated as the start of a sub-list. What a kludge, huh? This is
+ // an aspect of Markdown's syntax that's hard to parse perfectly
+ // without resorting to mind-reading. Perhaps the solution is to
+ // change the syntax rules such that sub-lists must start with a
+ // starting cardinal number; e.g. "1." or "a.".
+
+ _listLevel++;
+
+ // Trim trailing blank lines:
+ list = Regex.Replace(list, @"\n{2,}\z", "\n");
+
+ string pattern = string.Format(
+ @"(^[ ]*) # leading whitespace = $1
+ ({0}) [ ]+ # list marker = $2
+ ((?s:.+?) # list item text = $3
+ (\n+))
+ (?= (\z | \1 ({0}) [ ]+))", marker);
+
+ bool lastItemHadADoubleNewline = false;
+
+ // has to be a closure, so subsequent invocations can share the bool
+ MatchEvaluator ListItemEvaluator = (Match match) =>
+ {
+ string item = match.Groups[3].Value;
+
+ bool endsWithDoubleNewline = item.EndsWith("\n\n");
+ bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n");
+
+ if (containsDoubleNewline || lastItemHadADoubleNewline)
+ // we could correct any bad indentation here..
+ item = RunBlockGamut(Outdent(item) + "\n", unhash: false);
+ else
+ {
+ // recursion for sub-lists
+ item = DoLists(Outdent(item), isInsideParagraphlessListItem: true);
+ item = item.TrimEnd('\n');
+ if (!isInsideParagraphlessListItem) // only the outer-most item should run this, otherwise it's run multiple times for the inner ones
+ item = RunSpanGamut(item);
+ }
+ lastItemHadADoubleNewline = endsWithDoubleNewline;
+ return string.Format("{0}\n", item);
+ };
+
+ list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
+ _listLevel--;
+ return list;
+ }
+
+ private static Regex _codeBlock = new Regex(string.Format(@"
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space
+ (?:
+ (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc",
+ _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// /// Turn Markdown 4-space indented code into HTML pre code blocks
+ ///
+ private string DoCodeBlocks(string text)
+ {
+ text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));
+ return text;
+ }
+
+ private string CodeBlockEvaluator(Match match)
+ {
+ string codeBlock = match.Groups[1].Value;
+
+ codeBlock = EncodeCode(Outdent(codeBlock));
+ codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");
+
+ return string.Concat("\n\n", codeBlock, "\n
\n\n");
+ }
+
+ private static Regex _codeSpan = new Regex(@"
+ (?
+ /// Turn Markdown `code spans` into HTML code tags
+ ///
+ private string DoCodeSpans(string text)
+ {
+ // * You can use multiple backticks as the delimiters if you want to
+ // include literal backticks in the code span. So, this input:
+ //
+ // Just type ``foo `bar` baz`` at the prompt.
+ //
+ // Will translate to:
+ //
+ // Just type foo `bar` baz
at the prompt.
+ //
+ // There's no arbitrary limit to the number of backticks you
+ // can use as delimters. If you need three consecutive backticks
+ // in your code, use four for delimiters, etc.
+ //
+ // * You can use spaces to get literal backticks at the edges:
+ //
+ // ... type `` `bar` `` ...
+ //
+ // Turns to:
+ //
+ // ... type `bar`
...
+ //
+
+ return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));
+ }
+
+ private string CodeSpanEvaluator(Match match)
+ {
+ string span = match.Groups[2].Value;
+ span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace
+ span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace
+ span = EncodeCode(span);
+ span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans.
+
+ return string.Concat("", span, "
");
+ }
+
+
+ private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+ private static Regex _strictBold = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)\2(?=\S)(.*?\S)\2\2(?!\2)(?=[\W_]|$)",
+ RegexOptions.Singleline | RegexOptions.Compiled);
+
+ private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+ private static Regex _strictItalic = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)(?=\S)((?:(?!\2).)*?\S)\2(?!\2)(?=[\W_]|$)",
+ RegexOptions.Singleline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown *italics* and **bold** into HTML strong and em tags
+ ///
+ private string DoItalicsAndBold(string text)
+ {
+
+ // must go first, then
+ if (_strictBoldItalic)
+ {
+ text = _strictBold.Replace(text, "$1$3");
+ text = _strictItalic.Replace(text, "$1$3");
+ }
+ else
+ {
+ text = _bold.Replace(text, "$2");
+ text = _italic.Replace(text, "$2");
+ }
+ return text;
+ }
+
+ ///
+ /// Turn markdown line breaks (two space at end of line) into HTML break tags
+ ///
+ private string DoHardBreaks(string text)
+ {
+ if (_autoNewlines)
+ text = Regex.Replace(text, @"\n", string.Format("
[ ]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown > quoted blocks into HTML blockquote blocks
+ ///
+ private string DoBlockQuotes(string text)
+ {
+ return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));
+ }
+
+ private string BlockQuoteEvaluator(Match match)
+ {
+ string bq = match.Groups[1].Value;
+
+ bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting
+ bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines
+ bq = RunBlockGamut(bq); // recurse
+
+ bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline);
+
+ // These leading spaces screw with content, so we need to fix that:
+ bq = Regex.Replace(bq, @"(\s*.+?
)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ bq = string.Format("\n{0}\n
", bq);
+ string key = GetHashKey(bq, isHtmlBlock: true);
+ _htmlBlocks[key] = bq;
+
+ return "\n\n" + key + "\n\n";
+ }
+
+ private string BlockQuoteEvaluator2(Match match)
+ {
+ return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline);
+ }
+
+ private const string _charInsideUrl = @"[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;" + "\x1a]";
+ private const string _charEndingUrl = "[-A-Z0-9+&@#/%=~_|\\[\\])]";
+
+ private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://" + _charInsideUrl + "*" + _charEndingUrl + ")(?=$|\\W)",
+ RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ private static Regex _endCharRegex = new Regex(_charEndingUrl, RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ private static string handleTrailingParens(Match match)
+ {
+ // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this.
+ // We're not using a *real* lookbehind, because of links with in links, like
+ // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched.
+ // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing
+ // the google link to not be matched again.
+ if (match.Groups[1].Success)
+ return match.Value;
+
+ var protocol = match.Groups[2].Value;
+ var link = match.Groups[3].Value;
+ if (!link.EndsWith(")"))
+ return "<" + protocol + link + ">";
+ var level = 0;
+ foreach (Match c in Regex.Matches(link, "[()]"))
+ {
+ if (c.Value == "(")
+ {
+ if (level <= 0)
+ level = 1;
+ else
+ level++;
+ }
+ else
+ {
+ level--;
+ }
+ }
+ var tail = "";
+ if (level < 0)
+ {
+ link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; });
+ }
+ if (tail.Length > 0)
+ {
+ var lastChar = link[link.Length - 1];
+ if (!_endCharRegex.IsMatch(lastChar.ToString()))
+ {
+ tail = lastChar + tail;
+ link = link.Substring(0, link.Length - 1);
+ }
+ }
+ return "<" + protocol + link + ">" + tail;
+ }
+
+ ///
+ /// Turn angle-delimited URLs into HTML anchor tags
+ ///
+ ///
+ /// <http://www.example.com>
+ ///
+ private string DoAutoLinks(string text)
+ {
+
+ if (_autoHyperlink)
+ {
+ // fixup arbitrary URLs by adding Markdown < > so they get linked as well
+ // note that at this point, all other URL in the text are already hyperlinked as
+ // *except* for the case
+ text = _autolinkBare.Replace(text, handleTrailingParens);
+ }
+
+ // Hyperlinks:
+ text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
+
+ if (_linkEmails)
+ {
+ // Email addresses:
+ string pattern =
+ @"<
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+ )
+ >";
+ text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
+ }
+
+ return text;
+ }
+
+ private string HyperlinkEvaluator(Match match)
+ {
+ string link = match.Groups[1].Value;
+ return string.Format("{1}", EscapeBoldItalic(EncodeProblemUrlChars(link)), link);
+ }
+
+ private string EmailEvaluator(Match match)
+ {
+ string email = Unescape(match.Groups[1].Value);
+
+ //
+ // Input: an email address, e.g. "foo@example.com"
+ //
+ // Output: the email address as a mailto link, with each character
+ // of the address encoded as either a decimal or hex entity, in
+ // the hopes of foiling most address harvesting spam bots. E.g.:
+ //
+ // foo
+ // @example.com
+ //
+ // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
+ // mailing list:
+ //
+ email = "mailto:" + email;
+
+ // leave ':' alone (to spot mailto: later)
+ email = EncodeEmailAddress(email);
+
+ email = string.Format("{0}", email);
+
+ // strip the mailto: from the visible part
+ email = Regex.Replace(email, "\">.+?:", "\">");
+ return email;
+ }
+
+
+ private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);
+
+ ///
+ /// Remove one level of line-leading spaces
+ ///
+ private string Outdent(string block)
+ {
+ return _outDent.Replace(block, "");
+ }
+
+
+ #region Encoding and Normalization
+
+
+ ///
+ /// encodes email address randomly
+ /// roughly 10% raw, 45% hex, 45% dec
+ /// note that @ is always encoded and : never is
+ ///
+ private string EncodeEmailAddress(string addr)
+ {
+ var sb = new StringBuilder(addr.Length * 5);
+ var rand = new Random();
+ int r;
+ foreach (char c in addr)
+ {
+ r = rand.Next(1, 100);
+ if ((r > 90 || c == ':') && c != '@')
+ sb.Append(c); // m
+ else if (r < 45)
+ sb.AppendFormat("{0:x};", (int)c); // m
+ else
+ sb.AppendFormat("{0};", (int)c); // m
+ }
+ return sb.ToString();
+ }
+
+ private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled);
+
+ ///
+ /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals
+ ///
+ private string EncodeCode(string code)
+ {
+ return _codeEncoder.Replace(code, EncodeCodeEvaluator);
+ }
+ private string EncodeCodeEvaluator(Match match)
+ {
+ switch (match.Value)
+ {
+ // Encode all ampersands; HTML entities are not
+ // entities within a Markdown code span.
+ case "&":
+ return "&";
+ // Do the angle bracket song and dance
+ case "<":
+ return "<";
+ case ">":
+ return ">";
+ // escape characters that are magic in Markdown
+ default:
+ return _escapeTable[match.Value];
+ }
+ }
+
+
+ private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
+ private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
+
+ ///
+ /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets
+ ///
+ private string EncodeAmpsAndAngles(string s)
+ {
+ s = _amps.Replace(s, "&");
+ s = _angles.Replace(s, "<");
+ return s;
+ }
+
+ private static Regex _backslashEscapes;
+
+ ///
+ /// Encodes any escaped characters such as \`, \*, \[ etc
+ ///
+ private string EscapeBackslashes(string s)
+ {
+ return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));
+ }
+ private string EscapeBackslashesEvaluator(Match match)
+ {
+ return _backslashEscapeTable[match.Value];
+ }
+
+ private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled);
+
+ ///
+ /// swap back in all the special characters we've hidden
+ ///
+ private string Unescape(string s)
+ {
+ return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));
+ }
+ private string UnescapeEvaluator(Match match)
+ {
+ return _invertedEscapeTable[match.Value];
+ }
+
+
+ ///
+ /// escapes Bold [ * ] and Italic [ _ ] characters
+ ///
+ private string EscapeBoldItalic(string s)
+ {
+ s = s.Replace("*", _escapeTable["*"]);
+ s = s.Replace("_", _escapeTable["_"]);
+ return s;
+ }
+
+ private static string AttributeEncode(string s)
+ {
+ return s.Replace(">", ">").Replace("<", "<").Replace("\"", """);
+ }
+
+ private static readonly char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();
+
+ ///
+ /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
+ ///
+ private string EncodeProblemUrlChars(string url)
+ {
+ if (!_encodeProblemUrlCharacters) return url;
+
+ var sb = new StringBuilder(url.Length);
+ bool encode;
+ char c;
+
+ for (int i = 0; i < url.Length; i++)
+ {
+ c = url[i];
+ encode = Array.IndexOf(_problemUrlChars, c) != -1;
+ if (encode && c == ':' && i < url.Length - 1)
+ encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');
+
+ if (encode)
+ sb.Append("%" + String.Format("{0:x}", (byte)c));
+ else
+ sb.Append(c);
+ }
+
+ return sb.ToString();
+ }
+
+
+ ///
+ /// Within tags -- meaning between < and > -- encode [\ ` * _] so they
+ /// don't conflict with their use in Markdown for code, italics and strong.
+ /// We're replacing each such character with its corresponding hash
+ /// value; this is likely overkill, but it should prevent us from colliding
+ /// with the escape values by accident.
+ ///
+ private string EscapeSpecialCharsWithinTagAttributes(string text)
+ {
+ var tokens = TokenizeHTML(text);
+
+ // now, rebuild text from the tokens
+ var sb = new StringBuilder(text.Length);
+
+ foreach (var token in tokens)
+ {
+ string value = token.Value;
+
+ if (token.Type == TokenType.Tag)
+ {
+ value = value.Replace(@"\", _escapeTable[@"\"]);
+
+ if (_autoHyperlink && value.StartsWith("(?=.)", _escapeTable[@"`"]);
+ value = EscapeBoldItalic(value);
+ }
+
+ sb.Append(value);
+ }
+
+ return sb.ToString();
+ }
+
+ ///
+ /// convert all tabs to _tabWidth spaces;
+ /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF);
+ /// makes sure text ends with a couple of newlines;
+ /// removes any blank lines (only spaces) in the text
+ ///
+ private string Normalize(string text)
+ {
+ var output = new StringBuilder(text.Length);
+ var line = new StringBuilder();
+ bool valid = false;
+
+ for (int i = 0; i < text.Length; i++)
+ {
+ switch (text[i])
+ {
+ case '\n':
+ if (valid) output.Append(line);
+ output.Append('\n');
+ line.Length = 0; valid = false;
+ break;
+ case '\r':
+ if ((i < text.Length - 1) && (text[i + 1] != '\n'))
+ {
+ if (valid) output.Append(line);
+ output.Append('\n');
+ line.Length = 0; valid = false;
+ }
+ break;
+ case '\t':
+ int width = (_tabWidth - line.Length % _tabWidth);
+ for (int k = 0; k < width; k++)
+ line.Append(' ');
+ break;
+ case '\x1A':
+ break;
+ default:
+ if (!valid && text[i] != ' ') valid = true;
+ line.Append(text[i]);
+ break;
+ }
+ }
+
+ if (valid) output.Append(line);
+ output.Append('\n');
+
+ // add two newlines to the end before return
+ return output.Append("\n\n").ToString();
+ }
+
+ #endregion
+
+ ///
+ /// this is to emulate what's evailable in PHP
+ ///
+ private static string RepeatString(string text, int count)
+ {
+ var sb = new StringBuilder(text.Length * count);
+ for (int i = 0; i < count; i++)
+ sb.Append(text);
+ return sb.ToString();
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/Katteker.Gui/Properties/AssemblyInfo.cs b/Katteker.Gui/Properties/AssemblyInfo.cs
index 6c30bf7..d2ccb21 100644
--- a/Katteker.Gui/Properties/AssemblyInfo.cs
+++ b/Katteker.Gui/Properties/AssemblyInfo.cs
@@ -31,6 +31,5 @@ using System.Runtime.InteropServices;
// Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern
// übernehmen, indem Sie "*" eingeben:
// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
-[assembly: AssemblyInformationalVersion("1.0.0")]
+[assembly: AssemblyVersion("1.0.0")]
+[assembly: AssemblyFileVersion("1.0.0")]
diff --git a/Katteker.Gui/packages.config b/Katteker.Gui/packages.config
deleted file mode 100644
index 8f5839b..0000000
--- a/Katteker.Gui/packages.config
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/Katteker/Properties/AssemblyInfo.cs b/Katteker/Properties/AssemblyInfo.cs
index 9d91de6..d98e725 100644
--- a/Katteker/Properties/AssemblyInfo.cs
+++ b/Katteker/Properties/AssemblyInfo.cs
@@ -31,5 +31,5 @@ using System.Runtime.InteropServices;
// Sie können alle Werte angeben oder Standardwerte für die Build- und Revisionsnummern verwenden,
// indem Sie "*" wie unten gezeigt eingeben:
// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
+[assembly: AssemblyVersion("1.0.0")]
+[assembly: AssemblyFileVersion("1.0.0")]