3次元を取り扱うDBSCANプログラム

2024年2月11日
/*
	c:\users\ebata\dscan\dscan3.go

	DBSCANアルゴリズムを実装し、3次元空間のデータ（x、y、t）をクラスタリングするシンプルなプログラムです。
	このプログラムでは、各データポイントは3次元座標（x、y、t）で表され、距離の計算にはユークリッド距離を使用します。

	DBSCANをk-meansの違いで説明します。

	DBSCAN（Density-Based Spatial Clustering of Applications with Noise）とk-meansは、クラスタリングアルゴリズムですが、そのアプローチや動作原理にはいくつかの違いがあります。

	(1)クラスタリング方法:

	- DBSCAN: 密度ベースのクラスタリングアルゴリズムであり、データポイントの密度に基づいてクラスタを形成します。各点は、一定の距離（ε、epsilon）内に最小限の近傍点数（minPts）が存在する場合、その点を中心としたクラスタが形成されます。
	- k-means: 距離ベースのクラスタリングアルゴリズムであり、データポイントの距離に基づいてクラスタを形成します。クラスタの数（k）を事前に指定し、各点を最も近いセントロイド（クラスタの中心）に割り当てます。

	(2)クラスタの形状:

	- DBSCAN: クラスタの形状は任意であり、密度の高い領域に基づいて形成されます。したがって、DBSCANは非凸形状のクラスタを処理できます。
	- k-means: クラスタの形状は円形（球形）であり、各クラスタのセントロイドからの距離に基づいて決定されます。したがって、k-meansは凸形状のクラスタを前提としています。

	(3)ハイパーパラメータ:

	- DBSCAN: ε（epsilon）とminPtsの2つのハイパーパラメータを必要とします。εは近傍点の距離の閾値を定義し、minPtsはクラスタと見なすための最小の近傍点数を指定します
	- k-means: クラスタの数（k）を指定する必要があります。

	(4)ノイズの処理:

	- DBSCAN: ノイズポイントを自動的に検出し、外れ値として扱います。密度が低い領域に存在するポイントは、任意のクラスタに割り当てられず、ノイズとして扱われます。
	-k-means: 外れ値やノイズの処理を明示的に行いません。各点は必ずどれかのクラスタに割り当てられます。

	 要するに、DBSCANは密度ベースのアルゴリズムであり、任意の形状のクラスタを検出し、ノイズを処理する能力があります。一方、k-meansは距離ベースのアルゴリズムであり、クラスタの形状が円形であることを前提としています。


*/

package main

import (
	"fmt"
	"math"
)

// Point represents a 3D point with coordinates x, y, and t
type Point struct {
	X, Y, T float64
}

// DistanceTo calculates the Euclidean distance between two 3D points
func (p Point) DistanceTo(other Point) float64 {
	dx := p.X - other.X
	dy := p.Y - other.Y
	dt := p.T - other.T
	return math.Sqrt(dx*dx + dy*dy + dt*dt)
}

// Cluster represents a cluster of points
type Cluster struct {
	Points []Point
}

// DBSCAN performs density-based clustering of 3D points
func DBSCAN(points []Point, epsilon float64, minPts int) []Cluster {
	var clusters []Cluster
	var visited = make(map[Point]bool)

	for _, point := range points {
		if visited[point] {
			continue
		}
		visited[point] = true

		neighbours := getNeighbours(points, point, epsilon)
		if len(neighbours) < minPts {
			continue
		}

		var clusterPoints []Point
		expandCluster(&clusterPoints, points, visited, point, neighbours, epsilon, minPts)
		clusters = append(clusters, Cluster{Points: clusterPoints})
	}

	return clusters
}

// getNeighbours returns all points within distance epsilon of the given point
func getNeighbours(points []Point, point Point, epsilon float64) []Point {
	var neighbours []Point
	for _, other := range points {
		if point.DistanceTo(other) <= epsilon {
			neighbours = append(neighbours, other)
		}
	}
	return neighbours
}

// expandCluster expands the cluster from the given point
func expandCluster(cluster *[]Point, points []Point, visited map[Point]bool, point Point, neighbours []Point, epsilon float64, minPts int) {
	*cluster = append(*cluster, point)
	for _, neighbour := range neighbours {
		if !visited[neighbour] {
			visited[neighbour] = true
			neighbourNeighbours := getNeighbours(points, neighbour, epsilon)
			if len(neighbourNeighbours) >= minPts {
				expandCluster(cluster, points, visited, neighbour, neighbourNeighbours, epsilon, minPts)
			}
		}
		// Add neighbour to cluster if not already in another cluster
		var isInCluster bool
		for _, c := range *cluster {
			if c == neighbour {
				isInCluster = true
				break
			}
		}
		if !isInCluster {
			*cluster = append(*cluster, neighbour)
		}
	}
}

func main() {
	// Example usage
	points := []Point{
		{X: 1, Y: 2, T: 0},
		{X: 1.5, Y: 1.8, T: 1},
		{X: 5, Y: 8, T: 2},
		{X: 8, Y: 8, T: 3},
		{X: 1, Y: 0.6, T: 4},
		{X: 9, Y: 11, T: 5},
		{X: 8, Y: 2, T: 6},
		{X: 10, Y: 2, T: 7},
		{X: 9, Y: 3, T: 8},
	}

	epsilon := 3.0
	minPts := 2

	clusters := DBSCAN(points, epsilon, minPts)
	for i, cluster := range clusters {
		fmt.Printf("Cluster %d:\n", i+1)
		for _, point := range cluster.Points {
			fmt.Printf("  (%.2f, %.2f, %.2f)\n", point.X, point.Y, point.T)
		}
	}
}
2024年2月11日2024,江端さんの技術メモ
Posted by ebata