Python 获取景点代码

作者: haiyang 分类: Python 发布时间: 2019-07-31 17:12 浏览:239

本文章是以python3写的一个获取代码的案例。

 from tqdm import tqdm
 import time
 from selenium import webdriver
 from selenium.common.exceptions import TimeoutException, WebDriverException
 import pandas as pd
 import numpy as np
 import os
position = ["北京","天津","上海","重庆",
             "河北","山西","辽宁","吉林",
             "福建","江西","山东","河南",
             "湖北","湖南","广东","海南",
             "四川","贵州","云南","陕西",
             "甘肃","青海","台湾","内蒙古",
             "广西","西藏","宁夏","新疆",
             "香港","澳门"
             ]
 position = ['北京']
 name,level,hot,address,num=[],[],[],[],[]
 def get_one_page(key,page):
     try:
         option_chrome = webdriver.ChromeOptions()
         option_chrome.add_argument('--headless')
    driver = webdriver.Chrome(chrome_options=option_chrome)
    time.sleep(5)

    url="http://piao.qunar.com/ticket/list.htm?keyword="+str(key)+"&region=&from=mpl_search_suggest&page="+str(page)
    driver.get(url)
    infor = driver.find_elements_by_class_name("sight_item")
    for i in range(len(infor)):
        #景点名字
        name.append(infor[i].find_element_by_class_name("name").text)
        #获取景点评级
        try:
            level.append(infor[i].find_element_by_class_name("level").text)
        except:
             level.append("")
        #获取景点热度
        hot.append(infor[i].find_element_by_class_name("product_star_level").text[3:])
        #获取景点地址
        address.append(infor[i].find_element_by_class_name("area").text)
        #huo qu jing dian xiao liang
        try:
            num.append(infor[i].find_element_by_class_name("hot_num").text)
        except:
            num.append(0)
    driver.quit()
    return
except TimeoutException or WebDriverException:
    return get_one_page()
for key in tqdm(position):
     print("正在爬取{}".format(key))
     for page in range(1,14):
         print("正在爬取第{}页".format(page))
         get_one_page(key,page)
 sight = {'name': name, 'level': level, 'hot': hot, 'address': address, 'num':num}
 sight = pd.DataFrame(sight, columns=['name', 'level', 'hot', 'address', 'num'])
 sight.to_csv("sight.csv",encoding="utf_8_sig")

在这个代码需要下载chromedriver.exe。下载地址是:
http://chromedriver.chromium.org/downloads

本案例获取的数据是以.csv格式保存到本地。

如果觉得我的文章对您有用,请随意赞赏。您的支持将鼓励我继续创作!

发表评论

电子邮件地址不会被公开。 必填项已用*标注