diff --git a/src/Helper.cs b/src/Helper.cs new file mode 100644 index 0000000..8e02c85 --- /dev/null +++ b/src/Helper.cs @@ -0,0 +1,54 @@ +using System.Diagnostics; +using System.Drawing; +using System.Globalization; +using ScottPlot; + +namespace kMeans; + +public static class Helper +{ + public static Plot CreatePlot(IEnumerable points, IEnumerable centroids) + { + var plot = new Plot(); + plot.Legend(true, Alignment.UpperRight); + var colors = new Dictionary(); + var colorGroups = points.GroupBy(x => x.ClusterId); + foreach (var clusterGroup in colorGroups) + { + var color = plot.GetNextColor(); + colors.Add(clusterGroup.Key, color); + var xs = clusterGroup.Select(p => p.X).ToArray(); + var ys = clusterGroup.Select(p => p.Y).ToArray(); + plot.AddScatterPoints(xs, ys, color); + } + + const MarkerShape marker = MarkerShape.cross; + const float size = 10f; + foreach (var (x, y, clusterId) in centroids) + { + var color = colors.TryGetValue(clusterId, out var c) ? c : plot.GetNextColor(); + plot.AddScatterPoints(new[] { x }, new[] { y }, color, size, marker, $"ClusterId: {clusterId}"); + } + + return plot; + } + + public static void ExportAndPreviewPlot(Plot plot, string path) + { + plot.SaveFig(path); + using var p = Process.Start(new ProcessStartInfo(path) { UseShellExecute = true }); + p?.WaitForExit(); + } + + public static IEnumerable ParseCsv(string path) + { + var lines = File.ReadLines(path); + foreach (var line in lines) + { + var current = line.Split(','); + if (!double.TryParse(current[0], NumberStyles.Any, CultureInfo.InvariantCulture, out var x)) continue; + if (!double.TryParse(current[1], NumberStyles.Any, CultureInfo.InvariantCulture, out var y)) continue; + yield return new Point(x, y, -1); + } + } +} \ No newline at end of file diff --git a/src/Point.cs b/src/Point.cs new file mode 100644 index 0000000..6bf034e --- /dev/null +++ b/src/Point.cs @@ -0,0 +1,8 @@ +namespace kMeans; + +public record Point(double X, double Y, int ClusterId) +{ + public int ClusterId { get; set; } = ClusterId; + public double X { get; set; } = X; + public double Y { get; set; } = Y; +} \ No newline at end of file diff --git a/src/Program.cs b/src/Program.cs index c0d4250..2538051 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -1,7 +1,4 @@ -using ScottPlot; -using System.Diagnostics; -using System.Drawing; -using System.Globalization; +using static kMeans.Helper; namespace kMeans; @@ -13,78 +10,43 @@ public static class Program public static void Main() { - Console.WriteLine("k-Means-Algorithmus"); + Console.WriteLine("k-Means-Algorithm"); var points = ParseCsv(Coordinates).ToList(); - var centroids = InitializeRandomlyCentroids(points, K).ToList(); - AssignPointsToCentroids(points, centroids); - UpdateCentroids(points, centroids); - var plot = CreatePlot(points, centroids); - ExportAndPreviewPlot(plot); - } - private static bool UpdateCentroids(List points, List centroids) - { - // calculate mean of all points of one cluster. - foreach (var centroid in centroids) + var centroids = InitializeRandomlyCentroids(points, K).ToList(); + var update = true; + while (update) { - var clusterPoints = points.Where(x => x.ClusterId == centroid.ClusterId); - double newMeanX = 0; - double newMeanY = 0; - foreach (var cluster in clusterPoints) - { - centroid.X = 3; - } + AssignPointsToCentroids(points, centroids); + update = UpdateCentroids(points, centroids); } - return false; + var plot = CreatePlot(points, centroids); + ExportAndPreviewPlot(plot, PlotOut); } private static void AssignPointsToCentroids(List points, List centroids) { foreach (var point in points) { + var id = 0; + var distance = double.MaxValue; foreach (var centroid in centroids) { // calculate euclid distance and assign id. + var currentDistance = Distance(centroid, point); + if (currentDistance > distance) continue; + distance = currentDistance; + id = centroid.ClusterId; } + + point.ClusterId = id; } } - private static Plot CreatePlot(IEnumerable points, IEnumerable centroids) + private static double Distance(Point centroid, Point point) { - var sw = Stopwatch.StartNew(); - var plot = new Plot(); - var colors = new Dictionary(); - var colorGroups = points.GroupBy(x => x.ClusterId); - foreach (var clusterGroup in colorGroups) - { - var color = plot.GetNextColor(); - colors.Add(clusterGroup.Key, color); - var xs = clusterGroup.Select(p => p.X).ToArray(); - var ys = clusterGroup.Select(p => p.Y).ToArray(); - plot.AddScatterPoints(xs, ys, color); - } - - const MarkerShape marker = MarkerShape.cross; - const float size = 10f; - foreach (var centroid in centroids) - { - var color = colors.TryGetValue(centroid.ClusterId, out var c) ? c : plot.GetNextColor(); - plot.AddScatterPoints(new[] { centroid.X }, new[] { centroid.Y }, color, size, marker); - } - - sw.Stop(); - Console.WriteLine($"[{nameof(CreatePlot)}] Elapsed: {sw.ElapsedMilliseconds}ms"); - return plot; - } - - private static void ExportAndPreviewPlot(Plot plot) - { - var sw = Stopwatch.StartNew(); - plot.SaveFig(PlotOut); - Process.Start(new ProcessStartInfo(PlotOut) { UseShellExecute = true }); - sw.Stop(); - Console.WriteLine($"[{nameof(ExportAndPreviewPlot)}] Elapsed: {sw.ElapsedMilliseconds}ms"); + return Math.Sqrt(Math.Pow(centroid.X - point.X, 2) + Math.Pow(centroid.Y - point.Y, 2)); } private static IEnumerable InitializeRandomlyCentroids(IReadOnlyCollection points, int k) @@ -99,35 +61,35 @@ public static class Program { var x = (minX + maxX) * rnd.NextDouble(); var y = (minY + maxY) * rnd.NextDouble(); - yield return new Point(x, y, true, i); + var point = new Point(x, y, i); + Console.WriteLine(point); + yield return point; } } - private static IEnumerable ParseCsv(string path) + private static bool UpdateCentroids(IReadOnlyCollection points, List centroids) { - var lines = File.ReadLines(path); - foreach (var line in lines) + // calculate mean of all points of one cluster. + var updated = false; + foreach (var centroid in centroids) { - var current = line.Split(','); - if (!double.TryParse(current[0], NumberStyles.Any, CultureInfo.InvariantCulture, out var x)) continue; - if (!double.TryParse(current[1], NumberStyles.Any, CultureInfo.InvariantCulture, out var y)) continue; - yield return new Point(x, y, false, -1); + updated |= UpdateClusterCentroid(centroid, points); } - } -} -public class Point -{ - public Point(double x, double y, bool focusPoint, int clusterId) + return updated; + } + + private static bool UpdateClusterCentroid(Point centroid, IEnumerable points) { - X = x; - Y = y; - FocusPoint = focusPoint; - ClusterId = clusterId; - } + var pointsOfCluster = points.Where(p => p.ClusterId == centroid.ClusterId).ToArray(); + var sumX = pointsOfCluster.Sum(p => p.X); + var meanX = sumX / pointsOfCluster.Length; + var sumY = pointsOfCluster.Sum(p => p.Y); + var meanY = sumY / pointsOfCluster.Length; - public int ClusterId { get; set; } - public bool FocusPoint { get; set; } - public double X { get; set; } - public double Y { get; set; } + if (!(Math.Abs(centroid.X - meanX) > 1e-5) || !(Math.Abs(centroid.Y - meanY) > 1e-5)) return false; + centroid.X = meanX; + centroid.Y = meanY; + return true; + } } \ No newline at end of file