千万别用sql查询经纬度数据了,有更快的方式
最近需要查询学校周边的零售户有哪些,几千个学校,几十万零售户,用sql查询费了十几分钟才查询出来,后来问的AI,使用python代码,十几秒就能搞定。以下是完整的代码,可以作为参考。

def aaaaa():
import numpy as np
import pandas as pd
from sklearn.neighbors import BallTree
from time import time
# 1. 从 Doris 读取数据
from sqlalchemy import create_engine
engine = create_engine(
"mysql+pymysql://root:mysql.7334453@192.168.1.10:9030/hx_theme_data?charset=utf8mb4"
)
start_time = time()
merchants = pd.read_sql("""
SELECT retailer_uuid, label_ids, label_names, company_name,
label_list_fields, lic_no,
CAST(latitude AS DOUBLE) AS lat_d,
CAST(longitude AS DOUBLE) AS lng_d
FROM t_license
WHERE longitude IS NOT NULL AND longitude != 0
AND latitude IS NOT NULL AND latitude != 0
AND lic_status IN ('01','02','10')
""", engine)
schools = pd.read_sql("""
SELECT name, type,
CAST(lat AS DOUBLE) AS lat_d,
CAST(lng AS DOUBLE) AS lng_d
FROM t_data_school_info
WHERE lat IS NOT NULL AND lat != '' AND lat != '0'
AND lng IS NOT NULL AND lng != '' AND lng != '0'
""", engine)
# 2. 构建 BallTree(使用弧度,haversine 距离)
school_coords = np.radians(schools[['lat_d', 'lng_d']].values)
tree = BallTree(school_coords, metric='haversine')
# 3. 批量查询每个商家最近的学校,距离阈值 200m
merchant_coords = np.radians(merchants[['lat_d', 'lng_d']].values)
DISTANCE_M = 200
EARTH_RADIUS = 6371000
distances, indices = tree.query(merchant_coords, k=1) # 找最近1所
distances_m = distances[:, 0] * EARTH_RADIUS # 转换为米
# 4. 过滤距离 < 200m 的结果
mask = distances_m < DISTANCE_M
result = merchants[mask].copy()
result['distance'] = distances_m[mask]
result['school_name'] = schools.iloc[indices[mask, 0]]['name'].values
result['school_type'] = schools.iloc[indices[mask, 0]]['type'].values
result['school_lat'] = schools.iloc[indices[mask, 0]]['lat_d'].values
result['school_lng'] = schools.iloc[indices[mask, 0]]['lng_d'].values
# 5. 结果写回 Doris
# result.to_sql('t_merchant_nearest_school', engine, if_exists='replace')
print(f"耗时 {time() - start_time:.2f} 秒")
print(f"找到 {len(result)} 个200m内有学校的商家") 


