密度聚类js实现(备忘)

从别人那里学习来的,暂时只有代码,我没尝试具体实现

1、DBSCAN的js实现

//// test data with latitude and longitude
//var X = [
//    [10.0001, 10.0001], [10.0002, 10.0002], [10.0003, 10.0003], [10.0004, 10.0004],
//    [20.0001, 20.0001], [20.0002, 20.0002], [20.0003, 20.0003], [20.0004, 20.0004],
//    [30.0001, 30.0001], [30.0002, 30.0002], [30.0003, 30.0003], [30.0004, 30.0004],
//    [40.0001, 40.0001], [40.0002, 40.0002], [40.0003, 40.0003], [40.0004, 40.0004],
//    [70, 70], 
//    [80, 80]
//];
//var eps = 100;
//var MinPts = 4;

// spatial distance
function sp_dist(a, b) {
    var lat1 = a[0], lng1 = a[1], lat2 = b[0], lng2 = b[1];
    var toRadius = 0.017453292519943295;
    var R = 6371000; // radius in meter
    var dLat = (lat2 - lat1) * toRadius;
    var dLng = (lng2 - lng1) * toRadius;
    lat1 = lat1 * toRadius;
    lat2 = lat2 * toRadius;

    var a = Math.sin(dLat / 2) * Math.sin(dLat / 2) +
            Math.sin(dLng / 2) * Math.sin(dLng / 2) * Math.cos(lat1) * Math.cos(lat2);
    var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
    var d = R * c;

    return d;
}

// retrieve list of neighbors
function retrieve_neighbors(eps, point, cluster) {
    var neighbors = [];     // list of neighbor
    for (var iter = 0; iter < cluster.length; iter++) {
        var dist = sp_dist(point, cluster[iter]);
//        var dist2 = tp_dist(point, cluster[iter]);
        if (dist <= eps) {
            neighbors.push(iter);
        }
    }
    return neighbors;
}

// main function
var dbscan = function(X, eps, MinPts) {
    var cluster_label = 0; // label meaning: 0:unmarked; 1,2,3,...:cluster label; "noise":noise
    var labels = new Array(X.length).fill(0); // new an 0 array to store labels
    var clusters = []; // final output

    // clustering data points
    for (var i = 0; i < X.length; i++) {
        var neighbors = retrieve_neighbors(eps, X[i], X);

        if (neighbors.length < MinPts) {
            // if it is unmarked, mark it "noise"
            if (labels[i] === 0) {
                labels[i] = "noise";
            }
        } else {
            cluster_label += 1;  // construct a new cluster
            var cluster = [];   // construct cluster

            // mark label for all unmarked neighbors
            for (var j1 = 0; j1 < neighbors.length; j1++) {
                // if no other labels
                if (labels[neighbors[j1]] === 0 || labels[neighbors[j1]] === "noise") {
                    labels[neighbors[j1]] = cluster_label;
                    cluster.push(neighbors[j1]);
                }
            }

            // check the sub-circle of all objects
            while (neighbors.length !== 0) {
                var j2;
                j2 = neighbors.pop();
                var sub_neighbors = retrieve_neighbors(eps, X[j2], X);

                // mark all unmarked neighbors
                if (sub_neighbors.length >= MinPts) {
                    for (var k = 0; k < sub_neighbors.length; k++) {
                        // if no other labels 
                        if (labels[sub_neighbors[k]] === 0 || labels[sub_neighbors[k]] === "noise") {
                            neighbors.push(sub_neighbors[k]);
                            labels[sub_neighbors[k]] = cluster_label;
                            cluster.push(sub_neighbors[k]);
                        }
                    }
                }
            }

            // remove cluster of small size
            if (cluster.length < MinPts) {
                for (var j3 = 0; j3 < X.length; j3++) {
                    if (labels[j3] === cluster_label) {
                        labels[j3] = "noise";
                    }
                }
            } else {
                clusters.push(cluster);
            }
        }
    }

    //console.log(clusters);
    return clusters;
}

2、ST-DBSCAN

// test data
//var X = 
//     [
//      [2, 2, 1], [3, 3, 2], [4, 3, 1], [2, 3, 2],[1, 1, 1],
//      [32, 32, 31], [33, 34, 32], [34, 35, 34], [32, 31, 32],[31, 31, 31],
//      [100, 100, 10], [103, 102, 11], [100, 101, 9], [102, 102, 12], [101, 103, 10],
//      [200, 200, 10], [201, 202, 12], [201, 203, 14], [204, 201, 13], [203, 202, 12],
//      [300, 302, 51],
//      [400, 402, 101]
//     ];
//     


//var eps1 = 0.2;
//var eps2 = 5;
//var MinPts = 4;



// spatial distance 空间距离
function sp_dist(a, b) {
    return Math.sqrt(Math.pow((a[0] - b[0]), 2) + Math.pow((a[1] - b[1]), 2));
}

// temporal distance 时间距离
function tp_dist(a, b) {
    return Math.abs(a[2] - b[2]);
}

// retrieve list of neighbors 重复访问邻居
function retrieve_neighbors(eps1, eps2, point, cluster) {
    var neighbors = [];     // list of neighbor
    for (var iter=0; iter<cluster.length; iter++) {
        var dist1 = sp_dist(point, cluster[iter]);
        var dist2 = tp_dist(point, cluster[iter]);
        if (dist1 <= eps1 && dist2 <= eps2) {
            neighbors.push(iter);
        }
    }
    return neighbors;
}
st_dbscan(X, eps1, eps2, MinPts);
// main function 主方法
function st_dbscan(X, eps1, eps2, MinPts) {
	alert(X.length);
    var cluster_label = 0; // label meaning: 0:unmarked; 1,2,3,...:cluster label; "noise":noise //0:未标记;1,2,3 簇标签;noise:噪声
    var labels = new Array(X.length).fill(0); // new an 0 array to store labels 一个新列存放标签
    var clusters = []; // final output  最终输出

    // clustering data points
    for (var i=0; i<X.length; i++) {
        var neighbors = retrieve_neighbors(eps1, eps2, X[i], X);
        
        if (neighbors.length < MinPts) {
            // if it is unmarked, mark it "noise"
            if (labels[i] === 0) {
                labels[i] = "noise";
            }
        }else{
            cluster_label +=1;  // construct a new cluster
            var cluster = [];   // construct cluster

            // mark label for all unmarked neighbors
            for (var j1=0; j1<neighbors.length; j1++) {
                // if no other labels
                if (labels[neighbors[j1]] === 0 || labels[neighbors[j1]] === "noise") {  
                    labels[neighbors[j1]] = cluster_label;
                    cluster.push(neighbors[j1]);
                }
            }
            
            // check the sub-circle of all objects
            while (neighbors.length !== 0) {
                var j2;
                j2 = neighbors.pop();
                var sub_neighbors = retrieve_neighbors(eps1, eps2, X[j2], X);
                
                // mark all unmarked neighbors
                if (sub_neighbors.length >= MinPts) {
                    for (var k=0; k<sub_neighbors.length; k++) {
                        // if no other labels 
                        if (labels[sub_neighbors[k]] === 0 || labels[sub_neighbors[k]] === "noise") {
                            // might include |cluster_avg() - X[index]| < delta_eps
                            neighbors.push(sub_neighbors[k]);
                            labels[sub_neighbors[k]] = cluster_label;
                            cluster.push(sub_neighbors[k]);
                        }
                    }
                }
            }

            // remove cluster of small size
            if (cluster.length < MinPts) {
                for (var j3=0; j3<X.length; j3++) {
                    if (labels[j3] === cluster_label) {
                        labels[j3] = "noise";
                    }
                }
            }else{
                clusters.push(cluster);
            }
        }
    }
    alert(clusters);
    console.log(clusters);
    return clusters;
}

转载自:https://blog.csdn.net/weixin_37831477/article/details/78953043

You may also like...