diff --git a/Creator/PathFragments.cs b/Creator/PathFragments.cs index 2d48011..9635620 100644 --- a/Creator/PathFragments.cs +++ b/Creator/PathFragments.cs @@ -9,7 +9,7 @@ namespace KattekerCreator public PathFragments(string path) { - _fragments = path.Split(Path.PathSeparator); + _fragments = path.Split(Path.DirectorySeparatorChar); } public int FragmentLength => _fragments.Length; @@ -18,7 +18,7 @@ namespace KattekerCreator public override int GetHashCode() => (_fragments != null ? _fragments.GetHashCode() : 0); - public override string ToString() => string.Join(Path.PathSeparator.ToString(), _fragments.Take(_fragments.Length - 1)); + public override string ToString() => string.Join(Path.DirectorySeparatorChar.ToString(), _fragments.Take(_fragments.Length - 1)); protected bool Equals(PathFragments other) => Equals(_fragments, other._fragments); } diff --git a/Creator/Program.cs b/Creator/Program.cs index 890fcf5..3ff18b5 100644 --- a/Creator/Program.cs +++ b/Creator/Program.cs @@ -69,14 +69,11 @@ namespace KattekerCreator //Start makensis.exe var setupFilePath = CompileSetupScript(templateFile); //Copy to Output-Folder - if (CopyToOutputFolder(setupFilePath)) - { - //Create/Modify RELEASE File - var releaseEntry = AddPackageToReleaseFile(setupFilePath); - //Copy installer as setup.exe - CopyAsSetup(setupFilePath, releaseEntry); - } - + CopyToOutputFolder(setupFilePath); + //Create/Modify RELEASE File + var releaseEntry = AddPackageToReleaseFile(setupFilePath); + //Copy installer as setup.exe + CopyAsSetup(setupFilePath, releaseEntry); return 0; } @@ -103,31 +100,20 @@ namespace KattekerCreator } } - private bool CopyToOutputFolder(string setupFilePath) + private void CopyToOutputFolder(string setupFilePath) { - try + if (setupFilePath == null) throw new ArgumentNullException(nameof(setupFilePath)); + var setupFile = Path.GetFileName(setupFilePath); + if (string.IsNullOrEmpty(setupFile)) throw new ArgumentException(); + if (!File.Exists(setupFilePath)) throw new FileNotFoundException(setupFile); + if (!Directory.Exists(_appArguments.OutputDir)) Directory.CreateDirectory(_appArguments.OutputDir); + if (!string.IsNullOrEmpty(_appArguments.ChangeLog)) { - if (setupFilePath == null) throw new ArgumentNullException(nameof(setupFilePath)); - var setupFile = Path.GetFileName(setupFilePath); - if (string.IsNullOrEmpty(setupFile)) throw new ArgumentException(); - if (!File.Exists(setupFilePath)) throw new FileNotFoundException(setupFile); - if (!string.IsNullOrEmpty(_appArguments.ChangeLog)) - { - - var changeLogPath = Path.Combine(Path.GetDirectoryName(_appArguments.ProgramFile), _appArguments.ChangeLog); - if (!File.Exists(changeLogPath)) throw new FileNotFoundException(changeLogPath); - File.Copy(changeLogPath, Path.Combine(_appArguments.OutputDir, Path.GetFileName(_appArguments.ChangeLog) ?? throw new InvalidOperationException()), true); - } - - if (!Directory.Exists(_appArguments.OutputDir)) Directory.CreateDirectory(_appArguments.OutputDir); - File.Copy(setupFilePath, Path.Combine(_appArguments.OutputDir, setupFile), true); - return true; - } - catch (Exception e) - { - Log.WriteErrorLine(e.Message); - return false; + var changeLogPath = Path.Combine(Path.GetDirectoryName(_appArguments.ProgramFile), _appArguments.ChangeLog); + if (!File.Exists(changeLogPath)) throw new FileNotFoundException(changeLogPath); + File.Copy(changeLogPath, Path.Combine(_appArguments.OutputDir, _appArguments.ChangeLog), true); } + File.Copy(setupFilePath, Path.Combine(_appArguments.OutputDir, setupFile), true); } private static string CompileSetupScript(string templateFile) diff --git a/Example/MainWindow.xaml b/Example/MainWindow.xaml index 927a3c1..d721f83 100644 --- a/Example/MainWindow.xaml +++ b/Example/MainWindow.xaml @@ -8,10 +8,11 @@ Title="MainWindow" Width="800" Height="450" - Background="DarkBlue" + Background="BlueViolet" ContentRendered="MainWindow_OnContentRendered" mc:Ignorable="d"> + \ No newline at end of file diff --git a/Example/Properties/AssemblyInfo.cs b/Example/Properties/AssemblyInfo.cs index 2ed501b..1c3e88a 100644 --- a/Example/Properties/AssemblyInfo.cs +++ b/Example/Properties/AssemblyInfo.cs @@ -49,5 +49,5 @@ using System.Windows; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.36.0")] -[assembly: AssemblyFileVersion("1.0.36.0")] +[assembly: AssemblyVersion("1.0.42")] +[assembly: AssemblyFileVersion("1.0.42")] diff --git a/Katteker.Gui/ChangelogHelper.cs b/Katteker.Gui/ChangelogHelper.cs index 2adff46..e16f090 100644 --- a/Katteker.Gui/ChangelogHelper.cs +++ b/Katteker.Gui/ChangelogHelper.cs @@ -7,7 +7,7 @@ namespace Katteker.Gui { internal static class ChangelogHelper { - private static string GenerateHtmlifyChangelog(string text, string extension) + private static string GenerateChangelogAsHtml(string text, string extension) { string result; switch (extension) @@ -17,7 +17,7 @@ namespace Katteker.Gui result = plainText.Replace(Environment.NewLine, "
"); break; case ".md": - result = CommonMark.CommonMarkConverter.Convert(text); + result = new MarkdownSharp.Markdown().Transform(text); break; default: result = text; @@ -39,23 +39,23 @@ namespace Katteker.Gui using (var response = await webReq.GetResponseAsync().ConfigureAwait(false)) using (var sr = new StreamReader(response.GetResponseStream())) { - return GenerateHtmlifyChangelog(await sr.ReadToEndAsync().ConfigureAwait(false), + return GenerateChangelogAsHtml(await sr.ReadToEndAsync().ConfigureAwait(false), Path.GetExtension(filename)); } } catch (WebException) { var changelogFilename = Path.GetFileName(filename); - if (changelogFilename == null) return GenerateHtmlifyChangelog("Changelog not found", ".txt"); + if (changelogFilename == null) return GenerateChangelogAsHtml("Changelog not found", ".txt"); var currentChangelogPath = Path.Combine(Environment.CurrentDirectory, changelogFilename); if (File.Exists(currentChangelogPath)) { - return GenerateHtmlifyChangelog(File.ReadAllText(currentChangelogPath), Path.GetExtension(filename)); + return GenerateChangelogAsHtml(File.ReadAllText(currentChangelogPath), Path.GetExtension(filename)); } } } - return GenerateHtmlifyChangelog("Changelog not found", ".txt"); + return GenerateChangelogAsHtml("Changelog not found", ".txt"); } } } \ No newline at end of file diff --git a/Katteker.Gui/Katteker.Gui.csproj b/Katteker.Gui/Katteker.Gui.csproj index 943f9da..ced1c89 100644 --- a/Katteker.Gui/Katteker.Gui.csproj +++ b/Katteker.Gui/Katteker.Gui.csproj @@ -34,9 +34,6 @@ bin\Release\Katteker.Gui.xml - - ..\packages\CommonMark.NET.0.15.1\lib\net45\CommonMark.dll - @@ -46,6 +43,7 @@ + @@ -62,7 +60,6 @@ - diff --git a/Katteker.Gui/MarkdownSharp.cs b/Katteker.Gui/MarkdownSharp.cs new file mode 100644 index 0000000..345d2c1 --- /dev/null +++ b/Katteker.Gui/MarkdownSharp.cs @@ -0,0 +1,1779 @@ +/* + * MarkdownSharp + * ------------- + * a C# Markdown processor + * + * Markdown is a text-to-HTML conversion tool for web writers + * Copyright (c) 2004 John Gruber + * http://daringfireball.net/projects/markdown/ + * + * Markdown.NET + * Copyright (c) 2004-2009 Milan Negovan + * http://www.aspnetresources.com + * http://aspnetresources.com/blog/markdown_announced.aspx + * + * MarkdownSharp + * Copyright (c) 2009-2011 Jeff Atwood + * http://stackoverflow.com + * http://www.codinghorror.com/blog/ + * http://code.google.com/p/markdownsharp/ + * + * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it + * and let the community contribute to and improve MarkdownSharp. + * + */ + +#region Copyright and license + +/* + +Copyright (c) 2009 - 2010 Jeff Atwood + +http://www.opensource.org/licenses/mit-license.php + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Copyright (c) 2003-2004 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. +*/ + +#endregion + +using System; +using System.Collections.Generic; +using System.Configuration; +using System.Text; +using System.Text.RegularExpressions; + +namespace MarkdownSharp +{ + + public class MarkdownOptions + { + /// + /// when true, (most) bare plain URLs are auto-hyperlinked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoHyperlink { get; set; } + /// + /// when true, RETURN becomes a literal newline + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoNewlines { get; set; } + /// + /// use ">" for HTML output, or " />" for XHTML output + /// + public string EmptyElementSuffix { get; set; } + /// + /// when true, problematic URL characters like [, ], (, and so forth will be encoded + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool EncodeProblemUrlCharacters { get; set; } + /// + /// when false, email addresses will never be auto-linked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool LinkEmails { get; set; } + /// + /// when true, bold and italic require non-word characters on either side + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool StrictBoldItalic { get; set; } + } + + + /// + /// Markdown is a text-to-HTML conversion tool for web writers. + /// Markdown allows you to write using an easy-to-read, easy-to-write plain text format, + /// then convert it to structurally valid XHTML (or HTML). + /// + public class Markdown + { + private const string _version = "1.13"; + + #region Constructors and Options + + /// + /// Create a new Markdown instance using default options + /// + public Markdown() + { + } + + /// + /// Create a new Markdown instance and set the options from the MarkdownOptions object. + /// + public Markdown(MarkdownOptions options) + { + _autoHyperlink = options.AutoHyperlink; + _autoNewlines = options.AutoNewlines; + _emptyElementSuffix = options.EmptyElementSuffix; + _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters; + _linkEmails = options.LinkEmails; + _strictBoldItalic = options.StrictBoldItalic; + } + + + /// + /// use ">" for HTML output, or " />" for XHTML output + /// + public string EmptyElementSuffix + { + get { return _emptyElementSuffix; } + set { _emptyElementSuffix = value; } + } + private string _emptyElementSuffix = " />"; + + /// + /// when false, email addresses will never be auto-linked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool LinkEmails + { + get { return _linkEmails; } + set { _linkEmails = value; } + } + private bool _linkEmails = true; + + /// + /// when true, bold and italic require non-word characters on either side + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool StrictBoldItalic + { + get { return _strictBoldItalic; } + set { _strictBoldItalic = value; } + } + private bool _strictBoldItalic = false; + + /// + /// when true, RETURN becomes a literal newline + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoNewLines + { + get { return _autoNewlines; } + set { _autoNewlines = value; } + } + private bool _autoNewlines = false; + + /// + /// when true, (most) bare plain URLs are auto-hyperlinked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoHyperlink + { + get { return _autoHyperlink; } + set { _autoHyperlink = value; } + } + private bool _autoHyperlink = false; + + /// + /// when true, problematic URL characters like [, ], (, and so forth will be encoded + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool EncodeProblemUrlCharacters + { + get { return _encodeProblemUrlCharacters; } + set { _encodeProblemUrlCharacters = value; } + } + private bool _encodeProblemUrlCharacters = false; + + #endregion + + private enum TokenType { Text, Tag } + + private struct Token + { + public Token(TokenType type, string value) + { + this.Type = type; + this.Value = value; + } + public TokenType Type; + public string Value; + } + + /// + /// maximum nested depth of [] and () supported by the transform; implementation detail + /// + private const int _nestDepth = 6; + + /// + /// Tabs are automatically converted to spaces as part of the transform + /// this constant determines how "wide" those tabs become in spaces + /// + private const int _tabWidth = 4; + + private const string _markerUL = @"[*+-]"; + private const string _markerOL = @"\d+[.]"; + + private static readonly Dictionary _escapeTable; + private static readonly Dictionary _invertedEscapeTable; + private static readonly Dictionary _backslashEscapeTable; + + private readonly Dictionary _urls = new Dictionary(); + private readonly Dictionary _titles = new Dictionary(); + private readonly Dictionary _htmlBlocks = new Dictionary(); + + private int _listLevel; + private static string AutoLinkPreventionMarker = "\x1AP"; // temporarily replaces "://" where auto-linking shouldn't happen; + + /// + /// In the static constuctor we'll initialize what stays the same across all transforms. + /// + static Markdown() + { + // Table of hash values for escaped characters: + _escapeTable = new Dictionary(); + _invertedEscapeTable = new Dictionary(); + // Table of hash value for backslash escaped characters: + _backslashEscapeTable = new Dictionary(); + + string backslashPattern = ""; + + foreach (char c in @"\`*_{}[]()>#+-.!/") + { + string key = c.ToString(); + string hash = GetHashKey(key, isHtmlBlock: false); + _escapeTable.Add(key, hash); + _invertedEscapeTable.Add(hash, key); + _backslashEscapeTable.Add(@"\" + key, hash); + backslashPattern += Regex.Escape(@"\" + key) + "|"; + } + + _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled); + } + + /// + /// current version of MarkdownSharp; + /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute + /// + public string Version + { + get { return _version; } + } + + /// + /// Transforms the provided Markdown-formatted text to HTML; + /// see http://en.wikipedia.org/wiki/Markdown + /// + /// + /// The order in which other subs are called here is + /// essential. Link and image substitutions need to happen before + /// EscapeSpecialChars(), so that any *'s or _'s in the a + /// and img tags get encoded. + /// + public string Transform(string text) + { + if (String.IsNullOrEmpty(text)) return ""; + + Setup(); + + text = Normalize(text); + + text = HashHTMLBlocks(text); + text = StripLinkDefinitions(text); + text = RunBlockGamut(text); + text = Unescape(text); + + Cleanup(); + + return text + "\n"; + } + + + /// + /// Perform transformations that form block-level tags like paragraphs, headers, and list items. + /// + private string RunBlockGamut(string text, bool unhash = true) + { + text = DoHeaders(text); + text = DoHorizontalRules(text); + text = DoLists(text); + text = DoCodeBlocks(text); + text = DoBlockQuotes(text); + + // We already ran HashHTMLBlocks() before, in Markdown(), but that + // was to escape raw HTML in the original Markdown source. This time, + // we're escaping the markup we've just created, so that we don't wrap + //

tags around block-level tags. + text = HashHTMLBlocks(text); + + text = FormParagraphs(text, unhash: unhash); + + return text; + } + + + ///

+ /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items. + /// + private string RunSpanGamut(string text) + { + text = DoCodeSpans(text); + text = EscapeSpecialCharsWithinTagAttributes(text); + text = EscapeBackslashes(text); + + // Images must come first, because ![foo][f] looks like an anchor. + text = DoImages(text); + text = DoAnchors(text); + + // Must come after DoAnchors(), because you can use < and > + // delimiters in inline links like [this](). + text = DoAutoLinks(text); + + text = text.Replace(AutoLinkPreventionMarker, "://"); + + text = EncodeAmpsAndAngles(text); + text = DoItalicsAndBold(text); + text = DoHardBreaks(text); + + return text; + } + + private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled); + private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled); + private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled); + + private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled); + + /// + /// splits on two or more newlines, to form "paragraphs"; + /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag + /// + private string FormParagraphs(string text, bool unhash = true) + { + // split on two or more newlines + string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, "")); + + for (int i = 0; i < grafs.Length; i++) + { + if (grafs[i].StartsWith("\x1AH")) + { + // unhashify HTML blocks + if (unhash) + { + int sanityCheck = 50; // just for safety, guard against an infinite loop + bool keepGoing = true; // as long as replacements where made, keep going + while (keepGoing && sanityCheck > 0) + { + keepGoing = false; + grafs[i] = _htmlBlockHash.Replace(grafs[i], match => + { + keepGoing = true; + return _htmlBlocks[match.Value]; + }); + sanityCheck--; + } + /* if (keepGoing) + { + // Logging of an infinite loop goes here. + // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/ + // with the input that caused it. + }*/ + } + } + else + { + // do span level processing inside the block, then wrap result in

tags + grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "

") + "

"; + } + } + + return string.Join("\n\n", grafs); + } + + + private void Setup() + { + // Clear the global hashes. If we don't clear these, you get conflicts + // from other articles when generating a page which contains more than + // one article (e.g. an index page that shows the N most recent + // articles): + _urls.Clear(); + _titles.Clear(); + _htmlBlocks.Clear(); + _listLevel = 0; + } + + private void Cleanup() + { + Setup(); + } + + private static string _nestedBracketsPattern; + + /// + /// Reusable pattern to match balanced [brackets]. See Friedl's + /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. + /// + private static string GetNestedBracketsPattern() + { + // in other words [this] and [this[also]] and [this[also[too]]] + // up to _nestDepth + if (_nestedBracketsPattern == null) + _nestedBracketsPattern = + RepeatString(@" + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + ", _nestDepth) + RepeatString( + @" \] + )*" + , _nestDepth); + return _nestedBracketsPattern; + } + + private static string _nestedParensPattern; + + /// + /// Reusable pattern to match balanced (parens). See Friedl's + /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. + /// + private static string GetNestedParensPattern() + { + // in other words (this) and (this(also)) and (this(also(too))) + // up to _nestDepth + if (_nestedParensPattern == null) + _nestedParensPattern = + RepeatString(@" + (?> # Atomic matching + [^()\s]+ # Anything other than parens or whitespace + | + \( + ", _nestDepth) + RepeatString( + @" \) + )*" + , _nestDepth); + return _nestedParensPattern; + } + + private static Regex _linkDef = new Regex(string.Format(@" + ^[ ]{{0,{0}}}\[([^\[\]]+)\]: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + ? # url = $2 + [ ]* + \n? # maybe one newline + [ ]* + (?: + (?<=\s) # lookbehind for whitespace + [""(] + (.+?) # title = $3 + ["")] + [ ]* + )? # title is optional + (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Strips link definitions from text, stores the URLs and titles in hash references. + /// + /// + /// ^[id]: url "optional title" + /// + private string StripLinkDefinitions(string text) + { + return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator)); + } + + private string LinkEvaluator(Match match) + { + string linkID = match.Groups[1].Value.ToLowerInvariant(); + _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); + + if (match.Groups[3] != null && match.Groups[3].Length > 0) + _titles[linkID] = match.Groups[3].Value.Replace("\"", """); + + return ""; + } + + // compiling this monster regex results in worse performance. trust me. + private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + + + /// + /// derived pretty much verbatim from PHP Markdown + /// + private static string GetBlockPattern() + { + + // Hashify HTML blocks: + // We only want to do this for block-level HTML tags, such as headers, + // lists, and tables. That's because we still want to wrap

s around + // "paragraphs" that are wrapped in non-block-level tags, such as anchors, + // phrase emphasis, and spans. The list of tags we're looking for is + // hard-coded: + // + // * List "a" is made of tags which can be both inline or block-level. + // These will be treated block-level when the start tag is alone on + // its line, otherwise they're not matched here and will be taken as + // inline later. + // * List "b" is made of tags which are always block-level; + // + string blockTagsA = "ins|del"; + string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; + + // Regular expression for the content of a block tag. + string attr = @" + (?>    # optional tag attributes + \s   # starts with whitespace + (?> + [^>""/]+ # text outside quotes + | + /+(?!>)  # slash not followed by > + | + ""[^""]*""  # text inside double quotes (tolerate >) + | + '[^']*' # text inside single quotes (tolerate >) + )* + )? + "; + + string content = RepeatString(@" + (?> + [^<]+   # content without tag + | + <\2   # nested opening tag + " + attr + @" # attributes + (?> + /> + | + >", _nestDepth) + // end of opening tag + ".*?" + // last level nested tag content + RepeatString(@" +  # closing nested tag + ) + |    + <(?!/\2\s*> # other tags with a different name + ) + )*", _nestDepth); + + string content2 = content.Replace(@"\2", @"\3"); + + // First, look for nested blocks, e.g.: + // 

+ //  
+ //   tags for inner block must be indented. + //  
+ // 
+ // + // The outermost tags must start at the left margin for this to match, and + // the inner nested divs must be indented. + // We need to do this before the next, more liberal match, because the next + // match will start at the first `
` and stop at the first `
`. + string pattern = @" + (?> + (?> + (?<=\n) # Starting at the beginning of a line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + + # Match from `\n` to `\n`, handling nested tags + # in between. + + <($block_tags_b_re) # start tag = $2 + $attr> # attributes followed by > and \n + $content # content, support nesting + # the matching end tag + [ ]* # trailing spaces + (?=\n+|\Z) # followed by a newline or end of document + + | # Special version for tags of group a. + + <($block_tags_a_re) # start tag = $3 + $attr>[ ]*\n # attributes followed by > + $content2 # content, support nesting + # the matching end tag + [ ]* # trailing spaces + (?=\n+|\Z) # followed by a newline or end of document + + | # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + + [ ]{0,$less_than_tab} +
# the matching end tag + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # Special case for standalone HTML comments: + + (?<=\n\n|\A) # preceded by a blank line or start of document + [ ]{0,$less_than_tab} + (?s: + + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # PHP and ASP-style processor instructions ( + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + ) + )"; + + pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString()); + pattern = pattern.Replace("$block_tags_b_re", blockTagsB); + pattern = pattern.Replace("$block_tags_a_re", blockTagsA); + pattern = pattern.Replace("$attr", attr); + pattern = pattern.Replace("$content2", content2); + pattern = pattern.Replace("$content", content); + + return pattern; + } + + /// + /// replaces any block-level HTML blocks with hash entries + /// + private string HashHTMLBlocks(string text) + { + return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator)); + } + + private string HtmlEvaluator(Match match) + { + string text = match.Groups[1].Value; + string key = GetHashKey(text, isHtmlBlock: true); + _htmlBlocks[key] = text; + + return string.Concat("\n\n", key, "\n\n"); + } + + private static string GetHashKey(string s, bool isHtmlBlock) + { + var delim = isHtmlBlock ? 'H' : 'E'; + return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim; + } + + private static Regex _htmlTokens = new Regex(@" + ()| # match + (<\?.*?\?>)| # match " + + RepeatString(@" + (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) + + " # match and ", + RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// returns an array of HTML tokens comprising the input string. Each token is + /// either a tag (possibly with nested, tags contained therein, such + /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the + /// array is a two-element array; the first is either 'tag' or 'text'; the second is + /// the actual value. + /// + private List TokenizeHTML(string text) + { + int pos = 0; + int tagStart = 0; + var tokens = new List(); + + // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin. + // http://www.bradchoate.com/past/mtregex.php + foreach (Match m in _htmlTokens.Matches(text)) + { + tagStart = m.Index; + + if (pos < tagStart) + tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos))); + + tokens.Add(new Token(TokenType.Tag, m.Value)); + pos = tagStart + m.Length; + } + + if (pos < text.Length) + tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos))); + + return tokens; + } + + + private static Regex _anchorRef = new Regex(string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + private static Regex _anchorInline = new Regex(string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + \( # literal paren + [ ]* + ({1}) # href = $3 + [ ]* + ( # $4 + (['""]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ ]* # ignore any spaces between closing quote and ) + )? # title is optional + \) + )", GetNestedBracketsPattern(), GetNestedParensPattern()), + RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + private static Regex _anchorRefShortcut = new Regex(@" + ( # wrap whole match in $1 + \[ + ([^\[\]]+) # link text = $2; can't contain [ or ] + \] + )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Turn Markdown link shortcuts into HTML anchor tags + /// + /// + /// [link text](url "title") + /// [link text][id] + /// [id] + /// + private string DoAnchors(string text) + { + // First, handle reference-style links: [link text] [id] + text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator)); + + // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title") + text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator)); + + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link test][1] + // or [link test](/foo) + text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator)); + return text; + } + + private string SaveFromAutoLinking(string s) + { + return s.Replace("://", AutoLinkPreventionMarker); + } + + private string AnchorRefEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string linkText = SaveFromAutoLinking(match.Groups[2].Value); + string linkID = match.Groups[3].Value.ToLowerInvariant(); + + string result; + + // for shortcut links like [this][]. + if (linkID == "") + linkID = linkText.ToLowerInvariant(); + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + result = ""; + } + else + result = wholeMatch; + + return result; + } + + private string AnchorRefShortcutEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string linkText = SaveFromAutoLinking(match.Groups[2].Value); + string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces + + string result; + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + result = ""; + } + else + result = wholeMatch; + + return result; + } + + + private string AnchorInlineEvaluator(Match match) + { + string linkText = SaveFromAutoLinking(match.Groups[2].Value); + string url = match.Groups[3].Value; + string title = match.Groups[6].Value; + string result; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + if (url.StartsWith("<") && url.EndsWith(">")) + url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present + + result = string.Format("{0}", linkText); + return result; + } + + private static Regex _imagesRef = new Regex(@" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + private static Regex _imagesInline = new Regex(String.Format(@" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \s? # one optional whitespace character + \( # literal paren + [ ]* + ({0}) # href = $3 + [ ]* + ( # $4 + (['""]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ ]* + )? # title is optional + \) + )", GetNestedParensPattern()), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + /// + /// Turn Markdown image shortcuts into HTML img tags. + /// + /// + /// ![alt text][id] + /// ![alt text](url "optional title") + /// + private string DoImages(string text) + { + // First, handle reference-style labeled images: ![alt text][id] + text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator)); + + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator)); + + return text; + } + + // This prevents the creation of horribly broken HTML when some syntax ambiguities + // collide. It likely still doesn't do what the user meant, but at least we're not + // outputting garbage. + private string EscapeImageAltText(string s) + { + s = EscapeBoldItalic(s); + s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]); + return s; + } + + private string ImageReferenceEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string altText = match.Groups[2].Value; + string linkID = match.Groups[3].Value.ToLowerInvariant(); + + // for shortcut links like ![this][]. + if (linkID == "") + linkID = altText.ToLowerInvariant(); + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + string title = null; + + if (_titles.ContainsKey(linkID)) + title = _titles[linkID]; + + return ImageTag(url, altText, title); + } + else + { + // If there's no such link ID, leave intact: + return wholeMatch; + } + } + + private string ImageInlineEvaluator(Match match) + { + string alt = match.Groups[2].Value; + string url = match.Groups[3].Value; + string title = match.Groups[6].Value; + + if (url.StartsWith("<") && url.EndsWith(">")) + url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present + + return ImageTag(url, alt, title); + } + + private string ImageTag(string url, string altText, string title) + { + altText = EscapeImageAltText(AttributeEncode(altText)); + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + var result = string.Format("\"{1}\"", + /// Turn Markdown headers into HTML header tags + /// + /// + /// Header 1 + /// ======== + /// + /// Header 2 + /// -------- + /// + /// # Header 1 + /// ## Header 2 + /// ## Header 2 with closing hashes ## + /// ... + /// ###### Header 6 + /// + private string DoHeaders(string text) + { + text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator)); + text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator)); + return text; + } + + private string SetextHeaderEvaluator(Match match) + { + string header = match.Groups[1].Value; + int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2; + return string.Format("{0}\n\n", RunSpanGamut(header), level); + } + + private string AtxHeaderEvaluator(Match match) + { + string header = match.Groups[2].Value; + int level = match.Groups[1].Value.Length; + return string.Format("{0}\n\n", RunSpanGamut(header), level); + } + + + private static Regex _horizontalRules = new Regex(@" + ^[ ]{0,3} # Leading space + ([-*_]) # $1: First marker + (?> # Repeated marker group + [ ]{0,2} # Zero, one, or two spaces. + \1 # Marker character + ){2,} # Group repeated at least twice + [ ]* # Trailing spaces + $ # End of line. + ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Turn Markdown horizontal rules into HTML hr tags + /// + /// + /// *** + /// * * * + /// --- + /// - - - + /// + private string DoHorizontalRules(string text) + { + return _horizontalRules.Replace(text, " + /// Turn Markdown lists into HTML ul and ol and li tags + /// + private string DoLists(string text, bool isInsideParagraphlessListItem = false) + { + // We use a different prefix before nested lists than top-level lists. + // See extended comment in _ProcessListItems(). + if (_listLevel > 0) + text = _listNested.Replace(text, GetListEvaluator(isInsideParagraphlessListItem)); + else + text = _listTopLevel.Replace(text, GetListEvaluator(false)); + + return text; + } + + private MatchEvaluator GetListEvaluator(bool isInsideParagraphlessListItem = false) + { + return new MatchEvaluator(match => + { + string list = match.Groups[1].Value; + string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol"; + string result; + + result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL, isInsideParagraphlessListItem); + + result = string.Format("<{0}>\n{1}\n", listType, result); + return result; + }); + } + + /// + /// Process the contents of a single ordered or unordered list, splitting it + /// into individual list items. + /// + private string ProcessListItems(string list, string marker, bool isInsideParagraphlessListItem = false) + { + // The listLevel global keeps track of when we're inside a list. + // Each time we enter a list, we increment it; when we leave a list, + // we decrement. If it's zero, we're not in a list anymore. + + // We do this because when we're not inside a list, we want to treat + // something like this: + + // I recommend upgrading to version + // 8. Oops, now this line is treated + // as a sub-list. + + // As a single paragraph, despite the fact that the second line starts + // with a digit-period-space sequence. + + // Whereas when we're inside a list (or sub-list), that line will be + // treated as the start of a sub-list. What a kludge, huh? This is + // an aspect of Markdown's syntax that's hard to parse perfectly + // without resorting to mind-reading. Perhaps the solution is to + // change the syntax rules such that sub-lists must start with a + // starting cardinal number; e.g. "1." or "a.". + + _listLevel++; + + // Trim trailing blank lines: + list = Regex.Replace(list, @"\n{2,}\z", "\n"); + + string pattern = string.Format( + @"(^[ ]*) # leading whitespace = $1 + ({0}) [ ]+ # list marker = $2 + ((?s:.+?) # list item text = $3 + (\n+)) + (?= (\z | \1 ({0}) [ ]+))", marker); + + bool lastItemHadADoubleNewline = false; + + // has to be a closure, so subsequent invocations can share the bool + MatchEvaluator ListItemEvaluator = (Match match) => + { + string item = match.Groups[3].Value; + + bool endsWithDoubleNewline = item.EndsWith("\n\n"); + bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n"); + + if (containsDoubleNewline || lastItemHadADoubleNewline) + // we could correct any bad indentation here.. + item = RunBlockGamut(Outdent(item) + "\n", unhash: false); + else + { + // recursion for sub-lists + item = DoLists(Outdent(item), isInsideParagraphlessListItem: true); + item = item.TrimEnd('\n'); + if (!isInsideParagraphlessListItem) // only the outer-most item should run this, otherwise it's run multiple times for the inner ones + item = RunSpanGamut(item); + } + lastItemHadADoubleNewline = endsWithDoubleNewline; + return string.Format("
  • {0}
  • \n", item); + }; + + list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); + _listLevel--; + return list; + } + + private static Regex _codeBlock = new Regex(string.Format(@" + (?:\n\n|\A\n?) + ( # $1 = the code block -- one or more lines, starting with a space + (?: + (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc", + _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// /// Turn Markdown 4-space indented code into HTML pre code blocks + /// + private string DoCodeBlocks(string text) + { + text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator)); + return text; + } + + private string CodeBlockEvaluator(Match match) + { + string codeBlock = match.Groups[1].Value; + + codeBlock = EncodeCode(Outdent(codeBlock)); + codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, ""); + + return string.Concat("\n\n
    ", codeBlock, "\n
    \n\n"); + } + + private static Regex _codeSpan = new Regex(@" + (? + /// Turn Markdown `code spans` into HTML code tags + /// + private string DoCodeSpans(string text) + { + // * You can use multiple backticks as the delimiters if you want to + // include literal backticks in the code span. So, this input: + // + // Just type ``foo `bar` baz`` at the prompt. + // + // Will translate to: + // + //

    Just type foo `bar` baz at the prompt.

    + // + // There's no arbitrary limit to the number of backticks you + // can use as delimters. If you need three consecutive backticks + // in your code, use four for delimiters, etc. + // + // * You can use spaces to get literal backticks at the edges: + // + // ... type `` `bar` `` ... + // + // Turns to: + // + // ... type `bar` ... + // + + return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator)); + } + + private string CodeSpanEvaluator(Match match) + { + string span = match.Groups[2].Value; + span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace + span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace + span = EncodeCode(span); + span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. + + return string.Concat("", span, ""); + } + + + private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + private static Regex _strictBold = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)\2(?=\S)(.*?\S)\2\2(?!\2)(?=[\W_]|$)", + RegexOptions.Singleline | RegexOptions.Compiled); + + private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + private static Regex _strictItalic = new Regex(@"(^|[\W_])(?:(?!\1)|(?=^))(\*|_)(?=\S)((?:(?!\2).)*?\S)\2(?!\2)(?=[\W_]|$)", + RegexOptions.Singleline | RegexOptions.Compiled); + + /// + /// Turn Markdown *italics* and **bold** into HTML strong and em tags + /// + private string DoItalicsAndBold(string text) + { + + // must go first, then + if (_strictBoldItalic) + { + text = _strictBold.Replace(text, "$1$3"); + text = _strictItalic.Replace(text, "$1$3"); + } + else + { + text = _bold.Replace(text, "$2"); + text = _italic.Replace(text, "$2"); + } + return text; + } + + /// + /// Turn markdown line breaks (two space at end of line) into HTML break tags + /// + private string DoHardBreaks(string text) + { + if (_autoNewlines) + text = Regex.Replace(text, @"\n", string.Format("[ ]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// Turn Markdown > quoted blocks into HTML blockquote blocks + /// + private string DoBlockQuotes(string text) + { + return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator)); + } + + private string BlockQuoteEvaluator(Match match) + { + string bq = match.Groups[1].Value; + + bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting + bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines + bq = RunBlockGamut(bq); // recurse + + bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); + + // These leading spaces screw with
     content, so we need to fix that:
    +            bq = Regex.Replace(bq, @"(\s*
    .+?
    )", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + bq = string.Format("
    \n{0}\n
    ", bq); + string key = GetHashKey(bq, isHtmlBlock: true); + _htmlBlocks[key] = bq; + + return "\n\n" + key + "\n\n"; + } + + private string BlockQuoteEvaluator2(Match match) + { + return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline); + } + + private const string _charInsideUrl = @"[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;" + "\x1a]"; + private const string _charEndingUrl = "[-A-Z0-9+&@#/%=~_|\\[\\])]"; + + private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://" + _charInsideUrl + "*" + _charEndingUrl + ")(?=$|\\W)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static Regex _endCharRegex = new Regex(_charEndingUrl, RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static string handleTrailingParens(Match match) + { + // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this. + // We're not using a *real* lookbehind, because of links with in links, like + // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched. + // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing + // the google link to not be matched again. + if (match.Groups[1].Success) + return match.Value; + + var protocol = match.Groups[2].Value; + var link = match.Groups[3].Value; + if (!link.EndsWith(")")) + return "<" + protocol + link + ">"; + var level = 0; + foreach (Match c in Regex.Matches(link, "[()]")) + { + if (c.Value == "(") + { + if (level <= 0) + level = 1; + else + level++; + } + else + { + level--; + } + } + var tail = ""; + if (level < 0) + { + link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; }); + } + if (tail.Length > 0) + { + var lastChar = link[link.Length - 1]; + if (!_endCharRegex.IsMatch(lastChar.ToString())) + { + tail = lastChar + tail; + link = link.Substring(0, link.Length - 1); + } + } + return "<" + protocol + link + ">" + tail; + } + + /// + /// Turn angle-delimited URLs into HTML anchor tags + /// + /// + /// <http://www.example.com> + /// + private string DoAutoLinks(string text) + { + + if (_autoHyperlink) + { + // fixup arbitrary URLs by adding Markdown < > so they get linked as well + // note that at this point, all other URL in the text are already hyperlinked as + // *except* for the case + text = _autolinkBare.Replace(text, handleTrailingParens); + } + + // Hyperlinks: + text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); + + if (_linkEmails) + { + // Email addresses: + string pattern = + @"< + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + >"; + text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); + } + + return text; + } + + private string HyperlinkEvaluator(Match match) + { + string link = match.Groups[1].Value; + return string.Format("{1}", EscapeBoldItalic(EncodeProblemUrlChars(link)), link); + } + + private string EmailEvaluator(Match match) + { + string email = Unescape(match.Groups[1].Value); + + // + // Input: an email address, e.g. "foo@example.com" + // + // Output: the email address as a mailto link, with each character + // of the address encoded as either a decimal or hex entity, in + // the hopes of foiling most address harvesting spam bots. E.g.: + // + // foo + // @example.com + // + // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk + // mailing list: + // + email = "mailto:" + email; + + // leave ':' alone (to spot mailto: later) + email = EncodeEmailAddress(email); + + email = string.Format("{0}", email); + + // strip the mailto: from the visible part + email = Regex.Replace(email, "\">.+?:", "\">"); + return email; + } + + + private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// Remove one level of line-leading spaces + /// + private string Outdent(string block) + { + return _outDent.Replace(block, ""); + } + + + #region Encoding and Normalization + + + /// + /// encodes email address randomly + /// roughly 10% raw, 45% hex, 45% dec + /// note that @ is always encoded and : never is + /// + private string EncodeEmailAddress(string addr) + { + var sb = new StringBuilder(addr.Length * 5); + var rand = new Random(); + int r; + foreach (char c in addr) + { + r = rand.Next(1, 100); + if ((r > 90 || c == ':') && c != '@') + sb.Append(c); // m + else if (r < 45) + sb.AppendFormat("&#x{0:x};", (int)c); // m + else + sb.AppendFormat("&#{0};", (int)c); // m + } + return sb.ToString(); + } + + private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled); + + /// + /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals + /// + private string EncodeCode(string code) + { + return _codeEncoder.Replace(code, EncodeCodeEvaluator); + } + private string EncodeCodeEvaluator(Match match) + { + switch (match.Value) + { + // Encode all ampersands; HTML entities are not + // entities within a Markdown code span. + case "&": + return "&"; + // Do the angle bracket song and dance + case "<": + return "<"; + case ">": + return ">"; + // escape characters that are magic in Markdown + default: + return _escapeTable[match.Value]; + } + } + + + private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); + private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); + + /// + /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets + /// + private string EncodeAmpsAndAngles(string s) + { + s = _amps.Replace(s, "&"); + s = _angles.Replace(s, "<"); + return s; + } + + private static Regex _backslashEscapes; + + /// + /// Encodes any escaped characters such as \`, \*, \[ etc + /// + private string EscapeBackslashes(string s) + { + return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator)); + } + private string EscapeBackslashesEvaluator(Match match) + { + return _backslashEscapeTable[match.Value]; + } + + private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled); + + /// + /// swap back in all the special characters we've hidden + /// + private string Unescape(string s) + { + return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator)); + } + private string UnescapeEvaluator(Match match) + { + return _invertedEscapeTable[match.Value]; + } + + + /// + /// escapes Bold [ * ] and Italic [ _ ] characters + /// + private string EscapeBoldItalic(string s) + { + s = s.Replace("*", _escapeTable["*"]); + s = s.Replace("_", _escapeTable["_"]); + return s; + } + + private static string AttributeEncode(string s) + { + return s.Replace(">", ">").Replace("<", "<").Replace("\"", """); + } + + private static readonly char[] _problemUrlChars = @"""'*()[]$:".ToCharArray(); + + /// + /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems + /// + private string EncodeProblemUrlChars(string url) + { + if (!_encodeProblemUrlCharacters) return url; + + var sb = new StringBuilder(url.Length); + bool encode; + char c; + + for (int i = 0; i < url.Length; i++) + { + c = url[i]; + encode = Array.IndexOf(_problemUrlChars, c) != -1; + if (encode && c == ':' && i < url.Length - 1) + encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9'); + + if (encode) + sb.Append("%" + String.Format("{0:x}", (byte)c)); + else + sb.Append(c); + } + + return sb.ToString(); + } + + + /// + /// Within tags -- meaning between < and > -- encode [\ ` * _] so they + /// don't conflict with their use in Markdown for code, italics and strong. + /// We're replacing each such character with its corresponding hash + /// value; this is likely overkill, but it should prevent us from colliding + /// with the escape values by accident. + /// + private string EscapeSpecialCharsWithinTagAttributes(string text) + { + var tokens = TokenizeHTML(text); + + // now, rebuild text from the tokens + var sb = new StringBuilder(text.Length); + + foreach (var token in tokens) + { + string value = token.Value; + + if (token.Type == TokenType.Tag) + { + value = value.Replace(@"\", _escapeTable[@"\"]); + + if (_autoHyperlink && value.StartsWith("(?=.)", _escapeTable[@"`"]); + value = EscapeBoldItalic(value); + } + + sb.Append(value); + } + + return sb.ToString(); + } + + /// + /// convert all tabs to _tabWidth spaces; + /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF); + /// makes sure text ends with a couple of newlines; + /// removes any blank lines (only spaces) in the text + /// + private string Normalize(string text) + { + var output = new StringBuilder(text.Length); + var line = new StringBuilder(); + bool valid = false; + + for (int i = 0; i < text.Length; i++) + { + switch (text[i]) + { + case '\n': + if (valid) output.Append(line); + output.Append('\n'); + line.Length = 0; valid = false; + break; + case '\r': + if ((i < text.Length - 1) && (text[i + 1] != '\n')) + { + if (valid) output.Append(line); + output.Append('\n'); + line.Length = 0; valid = false; + } + break; + case '\t': + int width = (_tabWidth - line.Length % _tabWidth); + for (int k = 0; k < width; k++) + line.Append(' '); + break; + case '\x1A': + break; + default: + if (!valid && text[i] != ' ') valid = true; + line.Append(text[i]); + break; + } + } + + if (valid) output.Append(line); + output.Append('\n'); + + // add two newlines to the end before return + return output.Append("\n\n").ToString(); + } + + #endregion + + /// + /// this is to emulate what's evailable in PHP + /// + private static string RepeatString(string text, int count) + { + var sb = new StringBuilder(text.Length * count); + for (int i = 0; i < count; i++) + sb.Append(text); + return sb.ToString(); + } + + } +} \ No newline at end of file diff --git a/Katteker.Gui/Properties/AssemblyInfo.cs b/Katteker.Gui/Properties/AssemblyInfo.cs index 6c30bf7..d2ccb21 100644 --- a/Katteker.Gui/Properties/AssemblyInfo.cs +++ b/Katteker.Gui/Properties/AssemblyInfo.cs @@ -31,6 +31,5 @@ using System.Runtime.InteropServices; // Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern // übernehmen, indem Sie "*" eingeben: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] -[assembly: AssemblyInformationalVersion("1.0.0")] +[assembly: AssemblyVersion("1.0.0")] +[assembly: AssemblyFileVersion("1.0.0")] diff --git a/Katteker.Gui/packages.config b/Katteker.Gui/packages.config deleted file mode 100644 index 8f5839b..0000000 --- a/Katteker.Gui/packages.config +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/Katteker/Properties/AssemblyInfo.cs b/Katteker/Properties/AssemblyInfo.cs index 9d91de6..d98e725 100644 --- a/Katteker/Properties/AssemblyInfo.cs +++ b/Katteker/Properties/AssemblyInfo.cs @@ -31,5 +31,5 @@ using System.Runtime.InteropServices; // Sie können alle Werte angeben oder Standardwerte für die Build- und Revisionsnummern verwenden, // indem Sie "*" wie unten gezeigt eingeben: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] +[assembly: AssemblyVersion("1.0.0")] +[assembly: AssemblyFileVersion("1.0.0")]