2019-10-08 22:05:01 Python

Python

Copy Copied! Full
from bs4 import BeautifulSoup from flask import Flask from flask_sqlalchemy import SQLAlchemy from flask_migrate import Migrate import os import psycopg2.extras import requests app = Flask(__name__) app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+mysqldb://localhost/book_news' app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False db = SQLAlchemy(app) migrate = Migrate(app, db) class News(db.Model): __tablename__ = 'news' id = db.Column(db.Integer, primary_key=True) title = db.Column(db.String(120)) url = db.Column(db.String(240), unique=True) bkm = db.Column(db.Integer) # def __repr_(self): # return'<News %r>' % self.title list_url = ['https://qiita.com/', 'https://note.mu', 'https://diamond.jp', 'https://www.businessinsider.jp/'] i = 1 get_href = "" hb_count = 'http://api.b.st-hatena.com/entry.count' while(True): for read_url in list_url: htb_url = "https://b.hatena.ne.jp/entrylist?url="+read_url+"&count&page=" r = requests.get(str(htb_url) + str(i)) soup = BeautifulSoup(r.text, 'lxml') for h3 in soup.find_all('h3', class_='entrylist-contents-title'): for a in h3.find_all('a', class_='js-keyboard-openable'): get_href = a.get('href') #break判定に使用 r_hb = requests.get(hb_count, params={'url':get_href}) #以下の文でupdateもしてくれる newNews = News(title=str(a.get('title')), url=str(get_href), bkm=int(r_hb.text)) db.session.add(newNews) db.session.commit() i = i+1 if len(get_href) == 0: print(str(read_url)+"の取得完了です") break else: print(str(i)+"回目継続です("+str(read_url)+")") @app.cli.command('initdb') def initdb_command(): db.create_all() if __name__ == '__main__': app.run(debug=True)
RECOMMEND