使用POI数据挖掘区域功能并在网页端显示
目录
使用POI数据挖掘区域功能并在网页端显示
最近在做一个创新项目,其中包含区域功能挖掘的部分。前期我们使用了路网数据对上海市进行了区域的划分,并为每个POI加上了所属的区域标签。之后便使用此数据进行区域功能挖掘部分的展示。
一。使用TF_IDF算法挖掘出每个区域对应的功能,此处我们分了六大功能,分别是住宅,工作,教育,商业,公共服务,景点
有如下几个子步骤
(1)根据poi的三级目录将poi数据划分到六种poi类别
(2)统计TF_IDF算法使用到的中间结果
(3)使用TF_IDF挖掘出每个区域的功能,并记录相关结果
代码如下
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
/*
* poi.csv 原始经处理的poi,而且已经加上了区域编号,式例如下图
* "241076","永生餐饮","121.603927","31.235058","餐饮服务;中餐厅;中餐厅","275"
*
* 将操作合并起来,包括向poi中添加类,保留统计中间结果到result.txt
* 和计算TF_IDF大小,并最终得到每个区域的功能,
*
* poiRange.csv 提取有用的信息(名称,经度,纬度,poi类别,所属区域)
* result.txt(各个poi类别的数量,每个区域含有的poi数量)
* TF_IDF1/2.txt 记录两种IDF算法产生的TF_IDF大小
* function1/2.txt 记录两种算法分别对应的每种功能区的区域数量
* 每个功能区包含的区域编号存储在poi_label.txt文件中
*/
public class POI_Func {
public static String [][] CC;//代表class的数量
public static String [][] CC2;//代表SecondClass的数量
public static int ccc=0;
public static int getPoiClass(String type) {
int num=-1;
int flag=0;
for (int i = 0; i < CC.length; i++) {
if (flag==1) {
flag=0;
break;
}
for (int j = 0; j < CC[i].length; j++) {
if (CC[i][j]==null) {
break;
}
else if(CC[i][j].equals(type)){
num=i;
flag=1;
break;
}
}
}
return num;
}
public static int getSecondPoi(String type) {
int num=-1;
int flag=0;
for (int i = 0; i < CC2.length; i++) {
if (flag==1) {
flag=0;
break;
}
for (int j = 1; j < CC2[i].length; j++) {
if (CC2[i][j]==null) {
break;
}
else if(CC2[i][j].equals(type)){
num=Integer.parseInt(CC2[i][0]);
ccc++;
flag=1;
break;
}
}
}
return num;
}
public static void main(String[] args) throws IOException{
// TODO Auto-generated method stub
/*第一步:对原始数据poi.csv添加class.txt 中的poi类别,提取有用信息,
并将结果保存在poiRange.csv文件中*/
String path1 = "class5.txt";
String path2 = "poi.csv";
String path3 = "poiRange.csv";
String path4="result.txt";
int classLength=6;//记录poi类别的数量
int secondLength=3;//记录第二级目录的分类
int bigClass=19;//大类的数量
int secondClass=11;
int rangeNum=542;
String[] d1=null;
String[] d2=null;
String line=null;
String line2="";
CC=new String[classLength][bigClass];
CC2=new String[secondLength][secondClass];
int countP[]=new int[classLength];//countP[0]-countP[5]分别对应六种类别的poi数量
int countR[]=new int[rangeNum];//countR[0]-countR[]分别对应落在50个区域的
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(path1), "utf-8"));
BufferedReader br2 = new BufferedReader(new InputStreamReader(
new FileInputStream(path2), "utf-8"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path3), "utf-8"));
BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path4), "utf-8"));
for (int i = 0; i <classLength; i++) {
line=br.readLine();
d1=line.split(" ");
for (int j = 1; j < d1.length; j++) {
CC[i][j-1]=d1[j];
}
}
for (int i = 0; i < secondLength; i++) {
line=br.readLine();
d1=line.split(" ");
for (int j = 0; j < d1.length; j++) {
CC2[i][j]=d1[j];
}
}
int poiC=-1;
int count=0;
int range=-1;
while((line=br2.readLine())!=null){
d1=line.split(",");
d2=d1[4].split(";");
//System.out.println(d2[0].substring(1));
range=Integer.parseInt(d1[5].substring(1, d1[5].length()-1));
poiC=getPoiClass(d2[0].substring(1));
if (poiC==-1) {
//System.out.println(d1[4].substring(1,d1[4].length()-1));
if (!(d1[4].substring(1,d1[4].length()-1)).equals("NULL")) {
poiC=getSecondPoi(d2[0].substring(1)+";"+d2[1]);
}
if(poiC==-1)
{
count++;
}
}
if (range!=-1&&poiC!=-1) {
countR[range]++;
}
if(poiC!=-1&&range!=-1){
countP[poiC]++;
}
//统计 姓名+经度+纬度+poi分类(六类其中之一)+所属区域
line2=d1[1]+","+d1[2]+","+d1[3]+","+poiC+","+range;
bw.write(line2);
bw.newLine();
bw.flush();
}
bw.close();
br.close();
br2.close();
CC=null;
System.out.println("在区域外的poi数据:"+count);
System.out.println("通过第二级目录得到的数据:"+ccc);
System.out.println("----------------------------------------------");
line="";
System.out.print("poi:");
for (int i = 0; i < countP.length; i++) {
System.out.print(countP[i]+" ");
line+=countP[i]+" ";
}
System.out.println();
bw2.write(line.trim());
bw2.newLine();
bw2.flush();
line="";
System.out.print("Range:");
for (int i = 0; i < countR.length; i++) {
System.out.print(countR[i]+",");
line+=countR[i]+" ";
}
System.out.println();
bw2.write(line.trim());
bw2.flush();
bw2.close();
System.out.println("----------------------------------------------");
/*第二步:进行TF_IDF的计算,要使用countR[] 和 countP[]中的统计数据
*/
path1 = "poiRange.csv";
path2="TF_IDF1.txt";
path3="function1.txt";
path4="poi_label1.txt";
d1=null;
d2=null;
int R=542;
int countPoi=0;//统计有效的poi数量
br = new BufferedReader(new InputStreamReader(
new FileInputStream(path1), "utf-8"));
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path2), "utf-8"));
bw2 = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path3), "utf-8"));
line="";
int TF[][]=new int[R][6];//记录各个区域每个POI类别的数量
int F[]=new int[6];//代表含有poi类别的区域数
float IDF[][]=new float[R][6];
float TF_F[][]=new float[R][6];
float TF_IDF[][]=new float[R][7];
int numFunc[]=new int [6];//与功能划分相关的变量
String lable[]=new String[6];
for (int i = 0; i < lable.length; i++) {
lable[i]="";
}
double max=0l;
double temp=0l;
int cc=0;//如果为均为0的归属问题
int cc2=0;//均为0的个数
int flag=0;
count=0;
for (int i = 0; i < TF.length; i++) {
for (int j = 0; j < TF[1].length; j++) {
TF[i][j]=0;
TF_F[i][j]=0;
}
}
int r=-1,p=-1;
while ((line = br.readLine()) != null) {//从文件中获得数据
d1=line.split(",");
r=Integer.parseInt(d1[4]);
p=Integer.parseInt(d1[3]);
if (r!=-1&&p!=-1) {
TF[r][p]++;
countPoi++;
}
}
br.close();
for (int i = 0; i < TF.length; i++) {
for (int j = 0; j < TF[1].length; j++) {
if (countR[i]==0) {
TF_F[i][j]=0l;
}
else {
TF_F[i][j]=TF[i][j]*1.0f/countR[i];
}
}
}
for (int i = 0; i < TF.length; i++) {
for (int j = 0; j < TF[1].length; j++) {
if (TF[i][j]!=0) {
F[j]++;
}
}
}
//第一种类型的TF_IDF
for (int i = 0; i < IDF.length; i++) {
for (int j = 0; j < IDF[1].length; j++) {
IDF[i][j]=(float) Math.log(R*1.0/(F[j]+1));
if (IDF[i][j]<0) {
IDF[i][j]=0;
}
}
}
for (int i = 0; i < IDF.length; i++) {
for (int j = 0; j < IDF[1].length; j++) {
TF_IDF[i][j]=TF_F[i][j]*IDF[i][j];
temp=TF_IDF[i][j];
if (temp>max) {
flag=j;
max=temp;
}
}
if (max==0l) {
cc=cc%6;
numFunc[cc++]++;
cc2++;
}
else {
numFunc[flag]++;
lable[flag]+=count+" ";
}
max=0l;
flag=0;
count++;
}
line="";
for (int i = 0; i < TF.length; i++) {
for (int j = 0; j < TF[1].length; j++) {
line+=TF_IDF[i][j]+" ";
}
bw.write(line.trim());
bw.newLine();
bw.flush();
line="";
}
bw.close();
System.out.print("各个功能区含有的区域数(第一种):");
for (int i = 0; i < numFunc.length; i++) {
System.out.print(numFunc[i]+" ");
bw2.write(numFunc[i]+"");
bw2.newLine();
bw2.flush();
}
System.out.println();
bw2.close();
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path4), "utf-8"));
for (int i = 0; i < lable.length-1; i++) {
bw.write(lable[i].trim());
bw.newLine();
bw.flush();
}
bw.write(lable[lable.length-1].trim());
bw.flush();
bw.close();
//第二种类型的DF_IDF
path2="TF_IDF2.txt";
path3="function2.txt";
path4="poi_label2.txt";
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path2), "utf-8"));
bw2 = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path3), "utf-8"));
count=0;
for (int i = 0; i < lable.length; i++) {
lable[i]="";
}
for (int i = 0; i < numFunc.length; i++) {
numFunc[i]=0;
}
cc=0;
for (int i = 0; i < IDF.length; i++) {
for (int j = 0; j < IDF[1].length; j++) {
IDF[i][j]=(float) (100f/Math.log(countP[j]*1.0/TF[i][j])*Math.log(R*1.0/(F[j]+1)));
if (IDF[i][j]<0) {
IDF[i][j]=0;
}
}
}
for (int i = 0; i < IDF.length; i++) {
for (int j = 0; j < IDF[1].length; j++) {
TF_IDF[i][j]=TF_F[i][j]*IDF[i][j];
temp=TF_IDF[i][j];
if (temp>max) {
flag=j;
max=temp;
}
}
if (max==0l) {
cc=cc%6;
numFunc[cc++]++;
}
else {
numFunc[flag]++;
lable[flag]+=count+" ";
}
max=0l;
flag=0;
count++;
}
line="";
for (int i = 0; i < TF.length; i++) {
for (int j = 0; j < TF[1].length; j++) {
line+=TF_IDF[i][j]+" ";
}
bw.write(line.trim());
bw.newLine();
bw.flush();
line="";
}
bw.close();
System.out.print("各个功能区含有的区域数(第二种):");
for (int i = 0; i < numFunc.length; i++) {
System.out.print(numFunc[i]+" ");
bw2.write(numFunc[i]+"");
bw2.newLine();
bw2.flush();
}
System.out.println();
bw2.close();
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path4), "utf-8"));
for (int i = 0; i < lable.length-1; i++) {
bw.write(lable[i].trim());
bw.newLine();
bw.flush();
}
bw.write(lable[lable.length-1].trim());
bw.flush();
bw.close();
System.out.println("----------------------------------------------");
System.out.println("使用到poi数:"+countPoi);
}
}
二。将每个poi的有用信息提取出来,只包含经纬度和所属功能区编号(0-5)
代码如下
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
/*
* 此程序以poiRange.csv为输入,将结果保存在poiF1,poiF2.Json中
*
* poiF1: 包含class5的六个不同的功能区的poi数据(每个区域只含有对应功能区的poi),包含经纬度和所属功能编号
* poiF2: 包含class5的六个不同的功能区的poi数据(每个区域含有所有的poi,只不过poi功能号码相同),包含经纬度和所属功能编号
*/
public class Func_Json {
public static int CC[][];
public static int getFunction(int label){
int num=-1;
int flag=0;
for (int i = 0; i < CC.length; i++) {
if (flag==1) {
break;
}
for (int j = 0; j < CC[0].length; j++) {
if (CC[i][j]==-1) {
break;
}
else {
if (label==CC[i][j]) {
num=i;
flag=1;
break;
}
}
}
}
return num;
}
public static void main(String[] args) throws IOException{
// TODO Auto-generated method stub
String path1 = "poiRange.csv";
String path2="poi_label1.txt";
String path3 = "poiF1.json";
String path4 = "poiF2.json";
String[] d1=null;
String line=null;
String line2="";
String line3="";
int classPoi=6;
int classLength=260;
CC=new int[classPoi][classLength];
for (int i = 0; i < CC.length; i++) {
for (int j = 0; j < CC[i].length; j++) {
CC[i][j]=-1;
}
}
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(path1), "utf-8"));
BufferedReader br2 = new BufferedReader(new InputStreamReader(
new FileInputStream(path2), "utf-8"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path3), "utf-8"));
BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path4), "utf-8"));
int poi=-1,fun=-1,label=-1;
for (int i = 0; i < classPoi; i++) {
line=br2.readLine();
d1=line.split(" ");
for (int j = 0; j < d1.length; j++) {
CC[i][j]=Integer.parseInt(d1[j]);
}
}
int count=0;
int flag=0;
while((line=br.readLine())!=null){
d1=line.split(",");
poi=Integer.parseInt(d1[3]);
label=Integer.parseInt(d1[4]);
if (poi!=-1&&label!=-1) {
fun=getFunction(label);
if (fun==poi) {
line2+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":"
+d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},";
bw.write(line2);
bw.newLine();
bw.flush();
}
line3+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":"
+d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},";
bw2.write(line3);
bw2.newLine();
bw2.flush();
}
poi=-1;
label=-1;
line2="";
line3="";
}
line2="]";
bw.write(line2);
bw.flush();
bw2.write(line2);
bw2.flush();
bw.close();
bw2.close();
br.close();
br2.close();
System.out.println("Tranform end");
}
}
三。在网页中展示效果
利用上一步得到的json文件(要稍微处理一下,把最后一项{}之后的“,”去掉),在使用leaflet.js d3.js库,便能清晰的在地图上展示每个区域的功能区分布。
效果如下:
四。代码链接
(1)
java代码和源文件
java代码和源文件
(2)网页端显示地址
转载自:https://blog.csdn.net/lccla120712/article/details/71250628