需求:
根据公司名称 查询公司地址
有3000个公司名称的xlsx
计划方案:
方案1:用高德地图的API 获取,个人开发着免费5000条,成本为0
方案2:用企查查 这类API,官网报价0.15元 /次 查3000条 成本:500元
预期收益:
朋友介绍的需求:
88元红包
方案选择:
综上,选择方案1
实现:
高德地图API:https://lbs.amap.com/api/webservice/guide/api-advanced/search
高德地图API key的获得:https://lbs.amap.com/api/webservice/create-project-and-key
页面可以测试:

query_address.py
import requestsdef get_location(keywords: str):"""使用 高德地图 API 根据地址查询位置信息。"""url = "https://restapi.amap.com/v3/place/text?parameters"params = {"key": "填入你的key",   #####"keywords": keywords,"types": "公司企业","children": 1,"offset": 10,"extensions":"base"}res_data = requests.get(url, params=params, timeout=10)# 转换为<class 'dict'>res_dict = res_data.json()# print(res_dict)base_data = res_dict["pois"][0]# 拿到基础数据pname = base_data.get("pname")cityname = base_data["cityname"]address = base_data["address"]return pname,cityname,address# 示例使用
if __name__ == "__main__":keywords = "安徽安科"  # 替换成你想查询的名称pname,cityname, address = get_location(keywords)print(pname,cityname,address)
main.py
import pandas as pd
from query_address import get_location
import timedef read_xlsx():df = pd.read_excel('company.xlsx')return dfdef main():df = read_xlsx()first_column = df.iloc[:, 0]# 确保有足够的列并设置列名if df.shape[1] < 4:# 添加缺失的列for col_idx in range(df.shape[1], 4):if col_idx == 1:df['pname'] = Noneelif col_idx == 2:df['cityname'] = Noneelif col_idx == 3:df['address'] = Noneelse:df[f'col_{col_idx}'] = None# 统计信息valid_rows = first_column.notna().sum()total_rows = len(first_column)print(f"总行数: {total_rows}, 需要处理: {valid_rows} 行")processed = 0success = 0failed = 0for i, value in enumerate(first_column):if pd.notna(value):processed += 1print(f"\n[{processed}/{valid_rows}] 第{i + 1}行: {value}")try:pname, cityname, address = get_location(value)print(f"✓ 成功: {pname} | {cityname} | {address}")# 存储结果df.iloc[i, 1] = pnamedf.iloc[i, 2] = citynamedf.iloc[i, 3] = addresssuccess += 1# 定期保存进度if processed % 10 == 0:  # 每10条保存一次backup_file = f'test_progress_{processed}.xlsx'df.to_excel(backup_file, index=False)print(f"🔄 进度已保存到 {backup_file}")# API调用间隔,避免频率过高time.sleep(0.1)  # 根据API限制调整except Exception as e:print(f"✗ 失败: {e}")df.iloc[i, 1] = "查询失败"df.iloc[i, 2] = ""df.iloc[i, 3] = ""failed += 1# 最终保存final_file = 'test_final_result.xlsx'df.to_excel(final_file, index=False)print(f"\n=== 处理完成 ===")print(f"总计处理: {processed} 行")print(f"成功: {success} 行")print(f"失败: {failed} 行")print(f"结果已保存到: {final_file}")return dfif __name__ == '__main__':result_df = main()# 显示处理结果摘要print("\n=== 结果预览 ===")print(result_df.head(10))# 统计成功率success_count = (result_df.iloc[:, 1] != "查询失败").sum()total_processed = result_df.iloc[:, 1].notna().sum()if total_processed > 0:success_rate = (success_count / total_processed) * 100print(f"\n成功率: {success_rate:.1f}% ({success_count}/{total_processed})")
运行main:

总体目录:

结果:


