diff --git a/src/Program.cs b/src/Program.cs index d83a303..c0d4250 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -1,4 +1,5 @@ -using System.Diagnostics; +using ScottPlot; +using System.Diagnostics; using System.Drawing; using System.Globalization; @@ -7,46 +8,126 @@ namespace kMeans; public static class Program { private const string Coordinates = "coordinates.csv"; - private const string PlotOut = "plot.png"; private const int K = 3; + private const string PlotOut = "plot.png"; - private static void Main() + public static void Main() { Console.WriteLine("k-Means-Algorithmus"); - var points = ParseCsv(Coordinates); - PlotDiagram(points); + var points = ParseCsv(Coordinates).ToList(); + var centroids = InitializeRandomlyCentroids(points, K).ToList(); + AssignPointsToCentroids(points, centroids); + UpdateCentroids(points, centroids); + var plot = CreatePlot(points, centroids); + ExportAndPreviewPlot(plot); } - private static IReadOnlyList ParseCsv(string path) + private static bool UpdateCentroids(List points, List centroids) + { + // calculate mean of all points of one cluster. + foreach (var centroid in centroids) + { + var clusterPoints = points.Where(x => x.ClusterId == centroid.ClusterId); + double newMeanX = 0; + double newMeanY = 0; + foreach (var cluster in clusterPoints) + { + centroid.X = 3; + } + } + + return false; + } + + private static void AssignPointsToCentroids(List points, List centroids) + { + foreach (var point in points) + { + foreach (var centroid in centroids) + { + // calculate euclid distance and assign id. + } + } + } + + private static Plot CreatePlot(IEnumerable points, IEnumerable centroids) { var sw = Stopwatch.StartNew(); - var result = new List(); + var plot = new Plot(); + var colors = new Dictionary(); + var colorGroups = points.GroupBy(x => x.ClusterId); + foreach (var clusterGroup in colorGroups) + { + var color = plot.GetNextColor(); + colors.Add(clusterGroup.Key, color); + var xs = clusterGroup.Select(p => p.X).ToArray(); + var ys = clusterGroup.Select(p => p.Y).ToArray(); + plot.AddScatterPoints(xs, ys, color); + } + + const MarkerShape marker = MarkerShape.cross; + const float size = 10f; + foreach (var centroid in centroids) + { + var color = colors.TryGetValue(centroid.ClusterId, out var c) ? c : plot.GetNextColor(); + plot.AddScatterPoints(new[] { centroid.X }, new[] { centroid.Y }, color, size, marker); + } + + sw.Stop(); + Console.WriteLine($"[{nameof(CreatePlot)}] Elapsed: {sw.ElapsedMilliseconds}ms"); + return plot; + } + + private static void ExportAndPreviewPlot(Plot plot) + { + var sw = Stopwatch.StartNew(); + plot.SaveFig(PlotOut); + Process.Start(new ProcessStartInfo(PlotOut) { UseShellExecute = true }); + sw.Stop(); + Console.WriteLine($"[{nameof(ExportAndPreviewPlot)}] Elapsed: {sw.ElapsedMilliseconds}ms"); + } + + private static IEnumerable InitializeRandomlyCentroids(IReadOnlyCollection points, int k) + { + var minX = points.Min(p => p.X); + var maxX = points.Max(p => p.X); + var minY = points.Min(p => p.Y); + var maxY = points.Max(p => p.Y); + var rnd = new Random(); + + for (var i = 0; i < k; i++) + { + var x = (minX + maxX) * rnd.NextDouble(); + var y = (minY + maxY) * rnd.NextDouble(); + yield return new Point(x, y, true, i); + } + } + + private static IEnumerable ParseCsv(string path) + { var lines = File.ReadLines(path); foreach (var line in lines) { var current = line.Split(','); if (!double.TryParse(current[0], NumberStyles.Any, CultureInfo.InvariantCulture, out var x)) continue; if (!double.TryParse(current[1], NumberStyles.Any, CultureInfo.InvariantCulture, out var y)) continue; - var point = new Point(x, y); - result.Add(point); + yield return new Point(x, y, false, -1); } - - sw.Stop(); - Console.WriteLine($"[{nameof(ParseCsv)}] Elapsed: {sw.ElapsedMilliseconds}ms"); - return result; } +} - private static void PlotDiagram(IReadOnlyList points) +public class Point +{ + public Point(double x, double y, bool focusPoint, int clusterId) { - var sw = Stopwatch.StartNew(); - var plot = new ScottPlot.Plot(); - plot.AddScatterPoints(points.Select(p => p.X).ToArray(), points.Select(p => p.Y).ToArray()); - plot.SaveFig(PlotOut); - sw.Stop(); - Process.Start(new ProcessStartInfo(PlotOut) { UseShellExecute = true }); - Console.WriteLine($"[{nameof(PlotDiagram)}] Elapsed: {sw.ElapsedMilliseconds}ms"); + X = x; + Y = y; + FocusPoint = focusPoint; + ClusterId = clusterId; } - - private record Point(double X, double Y, Color? Color = default, bool Mean = false); + public int ClusterId { get; set; } + public bool FocusPoint { get; set; } + public double X { get; set; } + public double Y { get; set; } } \ No newline at end of file