import scrapyfrom ocean_weather.items import WeatherItem class WeatherSpider(scrapy.Spider): name = "weather" start_urls = [ 'http://example.com/weather', # 要爬取的起始URL ] def parse(self, response): # 解析网页内容,提取数据 # 利用XPath或CSS选择器提取需要的数据 date = response.xpath('//div[@class="date"]/text()').get() temperature = response.xpath('//div[@class="temperature"]/text()').get() wind_speed = response.xpath('//div[@class="wind-speed"]/text()').get() humidity = response.xpath('//div[@class="humidity"]/text()').get() # 创建一个Item对象,并将提取的数据存入此中 item = WeatherItem() item['date'] = date item['temperature'] = temperature item['wind_speed'] = wind_speed item['humidity'] = humidity yield item |
-- 创建天气数据表CREATE TABLE weather_data ( date STRING, temperature INT, humidity INT, wind_speed INT )ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE; -- 加载天气数据 LOAD DATA INPATH 'hdfs://path/to/weather/data.csv' INTO TABLE weather_data; -- 查询最高温度和最低湿度SELECT MAX(temperature) AS max_temperature, MIN(humidity) AS min_humidityFROM weather_data; -- 查询每个月的平均风速SELECT MONTH(date) AS month, AVG(wind_speed) AS avg_wind_speedFROM weather_dataGROUP BY MONTH(date); |
项目 | 体系环境及版本 |
硬件环境 | Windows 64 位操作体系 |
Python版本 | 3.5 |
数据库 | MySql |
开发工具 | Pycharm |
项目架构 | Hive+Flask+Scrapy |
from flask import Flask app = Flask(__name__) # 设置对象class Config: DEBUG = False SECRET_KEY = 'your_secret_key' DATABASE_URI = 'your_database_uri' # 加载设置 app.config.from_object(Config) # 路由和视图函数@app.route('/')def index(): return 'Hello, World!' if __name__ == '__main__': app.run() |
# 创建海洋气象数据表 cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS jobs ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, company TEXT, location TEXT, salary INTEGER ) ''') conn.commit() # 插入海洋气象数据def insert_job(title, company, location, salary): cursor.execute('INSERT INTO jobs (title, company, location, salary) VALUES (?, ?, ?, ?)', (title, company, location, salary)) conn.commit() # 查询海洋气象数据def query_jobs(): cursor.execute('SELECT * FROM jobs') return cursor.fetchall() # 更新海洋气象数据def update_job(id, title, company, location, salary): cursor.execute('UPDATE jobs SET title=?, company=?, location=?, salary=? WHERE id=?', (title, company, location, salary, id)) conn.commit() # 删除海洋气象数据def delete_job(id): cursor.execute('DELETE FROM jobs WHERE id=?', (id,)) conn.commit() # 关闭数据库毗连def close_connection(): cursor.close() conn.close() |
欢迎光临 ToB企服应用市场:ToB评测及商务社交产业平台 (https://dis.qidao123.com/) | Powered by Discuz! X3.4 |