手把手教你使用Flask搭建ES搜索引擎(實(shí)戰(zhàn)篇)
現(xiàn)在正式進(jìn)入主題:開(kāi)始使用 Flask 搭建 ES 搜索。
1 配置文件
Config.py
- #coding:utf-8
- import os
- DB_USERNAME = 'root'
- DB_PASSWORD = None # 如果沒(méi)有密碼的話
- DB_HOST = '127.0.0.1'
- DB_PORT = '3306'
- DB_NAME = 'flask_es'
- class Config:
- SECRET_KEY ="隨機(jī)字符" # 隨機(jī) SECRET_KEY
- SQLALCHEMY_COMMIT_ON_TEARDOWN = True # 自動(dòng)提交
- SQLALCHEMY_TRACK_MODIFICATIONS = True # 自動(dòng)sql
- DEBUG = True # debug模式
- SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://%s:%s@%s:%s/%s' % (DB_USERNAME, DB_PASSWORD,DB_HOST, DB_PORT, DB_NAME) #數(shù)據(jù)庫(kù)URL
- MAIL_SERVER = 'smtp.qq.com'
- MAIL_POST = 465
- MAIL_USERNAME = '3417947630@qq.com'
- MAIL_PASSWORD = '郵箱授權(quán)碼'
- FLASK_MAIL_SUBJECT_PREFIX='M_KEPLER'
- FLASK_MAIL_SENDER=MAIL_USERNAME # 默認(rèn)發(fā)送人
- # MAIL_USE_SSL = True
- MAIL_USE_TLS = False
這是一份相對(duì)簡(jiǎn)單的 Flask Config 文件,當(dāng)然對(duì)于當(dāng)前項(xiàng)目來(lái)說(shuō)數(shù)據(jù)庫(kù)的連接不是必要的,我只是用 Mysql 來(lái)作為輔助用,小伙伴們沒(méi)有必要配置連接數(shù)據(jù)庫(kù),有 ES 足以。然后郵箱通知這個(gè)看個(gè)人需求 .....
2 日志
Logger.py
日志模塊在工程應(yīng)用中是必不可少的一環(huán),根據(jù)不同的生產(chǎn)環(huán)境來(lái)輸出日志文件是非常有必要的。用句江湖上的話來(lái)說(shuō): "如果沒(méi)有日志文件,你死都不知道怎么死的 ....."
- # coding=utf-8
- import os
- import logging
- import logging.config as log_conf
- import datetime
- import coloredlogs
- coloredlogs.DEFAULT_FIELD_STYLES = {'asctime': {'color': 'green'}, 'hostname': {'color': 'magenta'}, 'levelname': {'color': 'magenta', 'bold': False}, 'name': {'color': 'green'}}
- log_dir = os.path.dirname(os.path.dirname(__file__)) + '/logs'
- if not os.path.exists(log_dir):
- os.mkdir(log_dir)
- today = datetime.datetime.now().strftime("%Y-%m-%d")
- log_path = os.path.join(log_dir, today + ".log")
- log_config = {
- 'version': 1.0,
- # 格式輸出
- 'formatters': {
- 'colored_console': {
- 'format': "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
- 'datefmt': '%H:%M:%S'
- },
- 'detail': {
- 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- 'datefmt': "%Y-%m-%d %H:%M:%S" #時(shí)間格式
- },
- },
- 'handlers': {
- 'console': {
- 'class': 'logging.StreamHandler',
- 'level': 'DEBUG',
- 'formatter': 'colored_console'
- },
- 'file': {
- 'class': 'logging.handlers.RotatingFileHandler',
- 'maxBytes': 1024 * 1024 * 1024,
- 'backupCount': 1,
- 'filename': log_path,
- 'level': 'INFO',
- 'formatter': 'detail', #
- 'encoding': 'utf-8', # utf8 編碼 防止出現(xiàn)編碼錯(cuò)誤
- },
- },
- 'loggers': {
- 'logger': {
- 'handlers': ['console'],
- 'level': 'DEBUG',
- },
- }
- }
- log_conf.dictConfig(log_config)
- log_v = logging.getLogger('log')
- coloredlogs.install(level='DEBUG', logger=log_v)
- # # Some examples.
- # logger.debug("this is a debugging message")
- # logger.info("this is an informational message")
- # logger.warning("this is a warning message")
- # logger.error("this is an error message")
這里準(zhǔn)備好了一份我常用的日志配置文件,可作為常用的日志格式,直接調(diào)用即可,根據(jù)不同的等級(jí)來(lái)輸出到終端或 .log 文件,拿走不謝。
3 路由
對(duì)于 Flask 項(xiàng)目而言, 藍(lán)圖和路由會(huì)讓整個(gè)項(xiàng)目更具觀賞性(當(dāng)然指的是代碼的閱讀)。
這里我采用兩個(gè)分支來(lái)作為數(shù)據(jù)支撐,一個(gè)是 Math 入口,另一個(gè)是 Baike 入口,數(shù)據(jù)的來(lái)源是基于上一篇的百度百科爬蟲(chóng)所得,根據(jù) 深度優(yōu)先 的爬取方式抓取后放入 ES 中。
- # coding:utf8
- from flask import Flask
- from flask_sqlalchemy import SQLAlchemy
- from app.config.config import Config
- from flask_mail import Mail
- from flask_wtf.csrf import CSRFProtect
- app = Flask(__name__,template_folder='templates',static_folder='static')
- app.config.from_object(Config)
- db = SQLAlchemy(app)
- db.init_app(app)
- csrf = CSRFProtect(app)
- mail = Mail(app)
- # 不要在生成db之前導(dǎo)入注冊(cè)藍(lán)圖。
- from app.home.baike import baike as baike_blueprint
- from app.home.math import math as math_blueprint
- from app.home.home import home as home_blueprint
- app.register_blueprint(home_blueprint)
- app.register_blueprint(math_blueprint,url_prefix="/math")
- app.register_blueprint(baike_blueprint,url_prefix="/baike")
- # -*- coding:utf-8 -*-
- from flask import Blueprint
- baike = Blueprint("baike", __name__)
- from app.home.baike import views
- # -*- coding:utf-8 -*-
- from flask import Blueprint
- math = Blueprint("math", __name__)
- from app.home.math import views
聲明路由并在 __init__ 文件中初始化
下面來(lái)看看路由的實(shí)現(xiàn)(以Baike為例)
- # -*- coding:utf-8 -*-
- import os
- from flask_paginate import Pagination, get_page_parameter
- from app.Logger.logger import log_v
- from app.elasticsearchClass import elasticSearch
- from app.home.forms import SearchForm
- from app.home.baike import baike
- from flask import request, jsonify, render_template, redirect
- baike_es = elasticSearch(index_type="baike_data",index_name="baike")
- @baike.route("/")
- def index():
- searchForm = SearchForm()
- return render_template('baike/index.html', searchForm=searchForm)
- @baike.route("/search", methods=['GET', 'POST'])
- def baikeSearch():
- search_key = request.args.get("b", default=None)
- if search_key:
- searchForm = SearchForm()
- log_v.error("[+] Search Keyword: " + search_key)
- match_data = baike_es.search(search_key,count=30)
- # 翻頁(yè)
- PER_PAGE = 10
- page = request.args.get(get_page_parameter(), type=int, default=1)
- start = (page - 1) * PER_PAGE
- end = start + PER_PAGE
- total = 30
- print("最大數(shù)據(jù)總量:", total)
- pagination = Pagination(page=page, start=start, end=end, total=total)
- context = {
- 'match_data': match_data["hits"]["hits"][start:end],
- 'pagination': pagination,
- 'uid_link': "/baike/"
- }
- return render_template('data.html', q=search_key, searchForm=searchForm, **context)
- return redirect('home.index')
- @baike.route('/<uid>')
- def baikeSd(uid):
- base_path = os.path.abspath('app/templates/s_d/')
- old_file = os.listdir(base_path)[0]
- old_path = os.path.join(base_path, old_file)
- file_path = os.path.abspath('app/templates/s_d/{}.html'.format(uid))
- if not os.path.exists(file_path):
- log_v.debug("[-] File does not exist, renaming !!!")
- os.rename(old_path, file_path)
- match_data = baike_es.id_get_doc(uid=uid)
- return render_template('s_d/{}.html'.format(uid), match_data=match_data)
可以看到我們成功的將 elasticSearch 類(lèi)初始化并且進(jìn)行了數(shù)據(jù)搜索。
我們使用了 Flask 的分頁(yè)插件進(jìn)行分頁(yè)并進(jìn)行了單頁(yè)數(shù)量的限制,根據(jù) Uid 來(lái)跳轉(zhuǎn)到詳情頁(yè)中。
細(xì)心的小伙伴會(huì)發(fā)現(xiàn)我這里用了個(gè)小技巧
- @baike.route('/<uid>')
- def baikeSd(uid):
- base_path = os.path.abspath('app/templates/s_d/')
- old_file = os.listdir(base_path)[0]
- old_path = os.path.join(base_path, old_file)
- file_path = os.path.abspath('app/templates/s_d/{}.html'.format(uid))
- if not os.path.exists(file_path):
- log_v.debug("[-] File does not exist, renaming !!!")
- os.rename(old_path, file_path)
- match_data = baike_es.id_get_doc(uid=uid)
- return render_template('s_d/{}.html'.format(uid), match_data=match_data)
以此來(lái)保證存放詳情頁(yè)面的模板中始終只保留一個(gè) html 文件。
4 項(xiàng)目啟動(dòng)
一如既往的采用 flask_script 作為項(xiàng)目的啟動(dòng)方案,確實(shí)方便。
- # coding:utf8
- from app import app
- from flask_script import Manager, Server
- manage = Manager(app)
- # 啟動(dòng)命令
- manage.add_command("runserver", Server(use_debugger=True))
- if __name__ == "__main__":
- manage.run()
黑窗口鍵入
- python manage.py runserver
就可以啟動(dòng)項(xiàng)目,默認(rèn)端口 5000,訪問(wèn) http://127.0.0.1:5000
使用gunicorn啟動(dòng)
- gunicorn -c gconfig.py manage:app
- #encoding:utf-8
- import multiprocessing
- from gevent import monkey
- monkey.patch_all()
- # 并行工作進(jìn)程數(shù)
- workers = multiprocessing.cpu_count() * 2 + 1
- debug = True
- reload = True # 自動(dòng)重新加載
- loglevel = 'debug'
- # 指定每個(gè)工作者的線程數(shù)
- threads = 2
- # 轉(zhuǎn)發(fā)為監(jiān)聽(tīng)端口8000
- bind = '0.0.0.0:5001'
- # 設(shè)置守護(hù)進(jìn)程,將進(jìn)程交給supervisor管理
- daemon = 'false'
- # 工作模式協(xié)程
- worker_class = 'gevent'
- # 設(shè)置最大并發(fā)量
- worker_connections = 2000
- # 設(shè)置進(jìn)程文件目錄
- pidfile = 'log/gunicorn.pid'
- logfile = 'log/debug.log'
- # 設(shè)置訪問(wèn)日志和錯(cuò)誤信息日志路徑
- accesslog = 'log/gunicorn_acess.log'
- errorlog = 'log/gunicorn_error.log'
項(xiàng)目截圖
項(xiàng)目 Github 地址
https://github.com/GZKY-PY/Flask-ES