0 导入库
import osimport pandas as pd
pd.set_option('display.max_rows',5)import osmnx as oximport geopandas as gpd
from shapely.geometry import Point
1 读取数据
假设我们有 如下的数据:
1.1 新加坡室外基站位置数据
cell_station=pd.read_csv('outdoor_LTE.csv')
cell_station
1.2 新加坡路网openstreetmap数据
G=ox.graph_from_place('Singapore,Singapore',network_type='drive')
ox.plot_graph(G)
1.2.1 从openstreetmap数据中提取路网数据
road_network=ox.utils_graph.graph_to_gdfs(G,nodes=False)
road_network
1.3 出行轨迹数据
traj=pd.read_csv('processed_dart_outdoor_3d.csv')
traj
其中latitude和longitude 是用户位置,带cell的是对应的基站位置,new_installation_id是用户id,timestamp_5s是时刻
1.3.1 出行轨迹转GeoDataFrame
points = [Point(xy) for xy in zip(traj.longitude, traj.latitude)]
points_gdf = gpd.GeoDataFrame(traj, geometry=points)
points_gdf
2 判断用户点在不在路网上
2.1 为每条道路创建非常小的缓冲区
在赤道附近,经纬度坐标系统中的一个度大约等于地球表面上的111公里,所以这里的buffer相当于1m
road_network_buffered = road_network.geometry.buffer(0.00001)
#将路网线几何对象缓冲一定距离(例如,1米),创建一个新的GeoDataFrame
road_network_buffered
'''
u v key
25451929 6749812859 0 POLYGON ((103.87103 1.29515, 103.87066 1.29508...
25455287 1637003462 0 POLYGON ((103.87412 1.29550, 103.87413 1.29550......
10732302222 259401350 0 POLYGON ((103.90657 1.30628, 103.90657 1.30628...
10806629050 2325064861 0 POLYGON ((103.90709 1.30698, 103.90709 1.30698...
Length: 45583, dtype: geometry
'''
缓冲区转化为geoDataFrame
road_network_buffered_gdf = gpd.GeoDataFrame(geometry=road_network_buffered)
road_network_buffered_gdf
2.2 判断每个点在不在路网的buffer上
points_in_road_network = gpd.sjoin(points_gdf,road_network_buffered_gdf, how="inner", op='within')
points_in_road_network
-
gpd.sjoin()
函数:执行空间连接操作。它将两个GeoDataFrame
基于空间关系合并。【基于点(points_gdf)是否在多边形(road_network_buffered_gdf
)内部】 -
how="inner"
:指定连接类型为内连接。这意味着结果中只会包含在points_gdf中的点,并且这些点必须位于road_network_buffered_gdf
内部。不在缓冲区内的点将被排除在外。 -
op='within'
:指定空间操作类型为“within”,即查找outdoor_traj
中哪些点位于road_network_buffered_gdf
的缓冲区多边形内部。
- 但是sjoin会存在一个问题:如果一个points_gdf中的点同时在两条路段的buffer中,结果中会分别出现这个点+一条路段buffer 的两个结果
- ——>一个时刻一个用户id,只保留一条即可
points_in_road_network_in_road=points_in_road_network.drop_duplicates(subset=['new_installation_id','timestamp_5s'])
points_in_road_network_in_road
3 不在路网的点和最近路段的距离
3.1 找到不在路网的用户点
traj_remain=traj.iloc[traj.index.difference(points_in_road_network_in_road.index)]
traj_remain
同样,生成对应的GeoDataFrame
geometry = [Point(xy) for xy in zip(traj_remain['longitude'], traj_remain['latitude'])]
traj_remain_gdf = gpd.GeoDataFrame(traj_remain, geometry=geometry)
3.2 将经纬度坐标转化为墨卡托坐标
转换成墨卡托坐标之后,两个点之间的距离单位就是米了
# 转换坐标系到UTM【横轴墨卡托】
utm_projection = "EPSG:32648"
# 新加坡对应的EPSG代码# 设置原始CRS为WGS 84 (EPSG:4326)
traj_remain_gdf.set_crs("EPSG:4326", inplace=True)
#这是GPS数据常用的坐标系统,其EPSG代码为4326road_network_utm = road_network.to_crs(utm_projection)
traj_remain_utm = traj_remain_gdf.to_crs(utm_projection)
3.3 获取距离
from shapely.ops import nearest_points
import pandas as pd# 创建一个空列表来存储距离
distances = []# 计算距离
for point in traj_remain_utm.geometry:#遍历每一个用户点nearest_geom_index = list(road_network_utm.sindex.nearest(point, 1))[1]nearest_geom = road_network_utm.geometry.iloc[nearest_geom_index]# 获取最近的路段(使用空间索引)distance = point.distance(nearest_geom)distances.append(distance.values[0])# 计算并存储距离traj_remain_utm['distance_to_nearest_road'] = distances
# 将距离列表添加到outdoor_traj_not_in_network_utm DataFrametraj_remain_utm['distance_to_nearest_road'].describe()
'''
count 330825.000000
mean 29.847753
std 65.107624
min 1.106306
25% 3.725888
50% 9.576145
75% 44.843000
max 4582.239106
Name: distance_to_nearest_road, dtype: float64
'''