Pure TypeScript implementations of common clustering algorithms.
1. Interface | 2. Algorithms | 3. Utilities
import * as clA from "t-ski/clustering-algorithms"type TVector = number[]
type TMatrix[] = number[][]
type TCluster = TVector[]
interface AClustering<O = TCluster> {
clusters: O[]
}clA.<algorithm>(data: I[], ...args): AClustering<I>clA.<algorithm>(data: number[]|TVector[], k: number = 2): AClusteringnew clA.KMeans(DATA, 3).clusters;O(n²) | Θ(n)
clA.KMeans(data, k?)O(n²) | Θ(n)
clA.KMeansPP(data, k?)O(n²) | Θ(n)
clA.KMedoids(data, k?)clA.<algorithm>(data: number[]|TVector[], epsilon?: number): AClusteringIf not defined, the epsilon parameter is estimated from the given data.
new clA.MeanShift(DATA, 2).clusters;O = Θ(n²)
clA.MeanShift(data, epsilon?): AClustering & { noise: TCluster }O(n²) | Θ(n log n)
clA.DBSCAN(data, epsilon?, minPoints: number = 4): AClustering & { noise: TCluster }O(n²) | Θ(n log n)
clA.OPTICS(data, epsilon?, minPoints: number = 4): AClustering & { noise: TCluster }clA.<algorithm>(adjacencyMatrix: TMatrix, k: number = 2): AClustering<number[]>Input to graph-based clustering is a target graph's adjacency matrix. Output corresponds to clusters of related graph node indexes.
new clA.ConnectedComponents(DATA).clusters;O = Θ(n³)
clA.ConnectedComponents(adjacencyMatrix)O = Θ(n³)
clA.Divisive(adjacencyMatrix, k: number = 2)O = Θ(n³)
clA.Markov(adjacencyMatrix, e: number = 2, r: number = 2)clA.<algorithm>(data: number[]|TVector[], k: number = 2): AClusteringnew clA.AverageLinkage(DATA, 4).clusters;O = Θ(n³)
clA.AverageLinkage(data, k?)O = Θ(n³)
clA.CentroidLinkage(data, k?)O = Θ(n³)
clA.CompleteLinkage(data, k?)O = Θ(n³)
clA.HausdorffLinkage(data, k?)O = Θ(n³)
clA.MedianLinkage(data, k?)O = Θ(n³)
clA.SingleLinkage(data, k?)O = Θ(n³)
clA.Ward(data, k?)f: (ℝ×…×ℝ)² ↦ ℕ
clA.util.distance.<metric>(p1: TVector, p2: TVector = []): numberclA.AClustering.setDistanceMetric(util.distance.manhattan);
const clusters = new clA.KMeans(DATA, 3).clusters;clA.util.distance.euclidean(p1: TVector, p2: TVector = []): numberclA.util.distance.manhattan(p1: TVector, p2: TVector = []): numberclA.util.distance.chebyshev(p1: TVector, p2: TVector = []): numberclA.util.distance.cosine(p1: TVector, p2: TVector = []): numberclA.util.quality.<metric>(clusters: TCluster[]): numberconst clusters = new clA.KMeans(DATA, 3).clusters;
const clusteringQuality = clA.util.quality.silhouetteCoefficient(clusters);f: ℕ×(ℝ×…×ℝ) ↦ [-1,1] → 1
clA.util.quality.silhouetteCoefficient(clusters: TCluster[]): numberf: ℕ×(ℝ×…×ℝ) ↦ [0,∞) → ∞
clA.util.quality.dunnIndex(clusters: TCluster[]): numberf: ℕ×(ℝ×…×ℝ) ↦ [0,∞) → 0
clA.util.quality.daviesBouldinIndex(clusters: TCluster[]): numberThe VectorDataMap utility class helps with associating arbitrary entites with data points (view example).
type TDataPoint<T> = [ TVector, T ]
new clA.util.VectorDataMap<T>(data: TDataPoint<T>[]): {
vectors: TVector[];
getEntity: (vector: TVector) => T|T[];
getVector: (entity: T) => TVector;
getCluster: (clusters: TCluster[], vector: TVector) => number|number[];
getCluster: (clusters: TCluster[], entity: T) => number|number[];
}© Thassilo Martin Schiepanski

