Python分割器教你給文章做手術(shù)
作者:佚名
Python分割器能夠幫助我們把長文章進(jìn)行分割。但是要如何才能熟練使用呢?下面我們就來詳細(xì)的學(xué)習(xí)相關(guān)的操作過程。
Python分割器在我們進(jìn)行文章分割的時候會經(jīng)常用到。當(dāng)然一篇相當(dāng)長的文章會讓你有些頭疼。看完下面的代碼希望大家能夠熟練的使用Python分割器進(jìn)行文章分割。
- # 將txt小說分割轉(zhuǎn)換成多個HTML文件
- # @author : GreatGhoul
- # @email : greatghoul@gmail.com
- # @blog : http://greatghoul.javaeye.com
- import re
- import os
- # regex for the section title
- # sec_re = re.compile(r'第.+卷\s+.+\s+第.+章\s+.+')
- # txt book's path.
- source_path = 'f:\\傭兵天下.txt'
- path_pieces = os.path.split(source_path)
- novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1])
- target_path = '%s%s_html' % (path_pieces[0], novel_title)
- section_re = re.compile(r'^\s*第.+卷\s+.*$')
- section_head = '''''
- <html>
- <head>
- <meta http-equiv="Content-Type" content="GBK"/>
- <title>%s</title>
- </head>
- <body style="font-family:楷體,宋體;font-size:16px;
margin:0;- padding: 20px; background:#FAFAD2;color:#2B4B86;text
-align:center;">- <h2>%s</h2><a href="#bottom">去頁尾</a><hr/>'''
- # escape xml/html
- def escape_xml(code):
- text = code
- text = re.sub(r'<', '<', text)
- text = re.sub(r'>', '>', text)
- text = re.sub(r'&', '&', text)
- text = re.sub(r'\t', ' ', text)
- text = re.sub(r'\s', ' ', text)
- return text
- # entry of the script
- def main():
- # create the output folder
- if not os.path.exists(target_path):
- os.mkdir(target_path)
- # open the source file
- input = open(source_path, 'r')
- sec_count = 0
- sec_cache = []
- idx_cache = []
- output = open('%s\\%d.html' % (target_path, sec_count), 'w')
- preface_title = '%s 前言' % novel_title
- output.writelines([section_head % (preface_title,
preface_title)])- idx_cache.append('<li><a href="%d.html">%s</a></li>'
- % (sec_count, novel_title))
- for line in input:
- # is a chapter's title?
- if line.strip() == '':
- pass
- elif re.match(section_re, line):
- line = re.sub(r'\s+', ' ', line)
- print 'converting %s...' % line
- # write the section footer
- sec_cache.append('<hr/><p>')
- if sec_count == 0:
- sec_cache.append('<a href="index.html">目錄</a> | ')
- sec_cache.append('<a href="%d.html">下一篇</a> | '
- % (sec_count + 1))
- else:
- sec_cache.append('<a href="%d.html">上一篇</a> | '
- % (sec_count - 1))
- sec_cache.append('<a href="index.html">目錄</a> | ')
- sec_cache.append('<a href="%d.html">下一篇</a> | '
- % (sec_count + 1))
- sec_cache.append('<a name="bottom" href="#">回頁首</a></p>')
- sec_cache.append('</body></html>')
- output.writelines(sec_cache)
- output.flush()
- output.close()
- sec_cache = []
- sec_count += 1
- # create a new section
- output = open('%s\\%d.html' % (target_path, sec_count), 'w')
- output.writelines([section_head % (line, line)])
- idx_cache.append('<li><a href="%d.html">%s</a></li>'
- % (sec_count, line))
- else:
- sec_cache.append('<p style="text-align:left;">%s</p>'
- % escape_xml(line))
- # write rest lines
- sec_cache.append('<a href="%d.html">下一篇</a> | '
- % (sec_count - 1))
- sec_cache.append('<a href="index.html">目錄</a> | ')
- sec_cache.append('<a name="bottom" href="
#">回頁首</a></p></body></html>')- output.writelines(sec_cache)
- output.flush()
- output.close()
- sec_cache = []
- # write the menu
- output = open('%s\\index.html' % (target_path), 'w')
- menu_head = '%s 目錄' % novel_title
- output.writelines([section_head % (menu_head, menu_head),
'<ul style="text-align:left">'])- output.writelines(idx_cache)
- output.writelines(['</ul><body></html>'])
- output.flush()
- output.close()
- inx_cache = []
- print 'completed. %d chapter(s) in total.' % sec_count
- if __name__ == '__main__':
- main()
以上就是對Python分割器的相關(guān)介紹,希望大家有所收獲。
【編輯推薦】
責(zé)任編輯:張浩
來源:
CSDN