一个小工具,发现有一个开源的诗词库( https://github.com/michaelliao/shici/tree/master/web/src/main/resources/text ),考虑导入到数据库中,诗词库是通过文件夹方式组织,所以用python发现很快的实现这个需求,主要功能包括递归枚举文件夹,然后将文件内容读出来写到数据库中。
代码记录如下:
#导入依赖库 import os,sys from datetime import datetime import pymysql def readpoem(path): str='' f = open(path, 'r') line = f.readline() while line: oldline = line line = f.readline() if 'form' in oldline: continue if 'tags' in oldline: continue if oldline.strip()=='': continue #oldline=oldline.strip('\n') str += oldline f.close() return str def readmeta(path): str='' f = open(path, 'r') line = f.readline() while line: oldline = line line = f.readline() if 'birth' in oldline: continue if 'death' in oldline: continue if oldline.strip()=='': continue oldline=oldline.strip('\n') str += oldline f.close() return str def listdir(path, list_result): result={} containFile=False for file in os.listdir(path): file_path = os.path.join(path, file) if os.path.isdir(file_path): listdir(file_path, list_result) elif os.path.splitext(file_path)[1]=='.txt': if 'meta' in file_path: meta = readmeta(file_path) if meta.strip()!='': result['meta']=meta else: #filepath,fullflname = os.path.split(file_path) #fname,ext = os.path.splitext(fullflname) containFile=True content = readpoem(file_path) if result.get('poem', None) is None: result['poem']=[] poem_detail={} if content.strip()!='': poem_detail['content'] =content.replace('\'', '') #Get the poem name poem_name=os.path.splitext(file_path)[0] poem_temp_array = poem_name.split('/') poem_titles = poem_temp_array[-3:] index = 0 for title in poem_titles: if poem_detail.get('title', None) is None: poem_detail['title'] = '[{0}]'.format(title.split('.')[1].replace('\'', '')) else: if index == 1: poem_detail['title'] += title.strip() else: poem_detail['title'] += '-{0}'.format(title.strip()) index += 1 #wirite the poem detail info result['poem'].append(poem_detail); #print('name:{0}, content:{1}'.format(poem_detail['title'], poem_detail['content'])) #list_result.append(file_path) else: pass if containFile: print('poem.numb:{0}'.format(len(result['poem']))) else: pass if len(result) >0: list_result.append(result) else: pass defaultencoding = 'utf-8' if sys.getdefaultencoding() != defaultencoding: reload(sys) sys.setdefaultencoding(defaultencoding) result_poems=[] listdir(sys.path[0], result_poems) print("size:%d" %len(result_poems)) #for item in result_file: # print(item.get('meta', '')) # for poem in item['poem']: # print('name:{0}, content:{1}'.format(poem['title'], poem['content'])) #写入数据库 #from datetime import datetime #connect db conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='python', db='myschool', charset='utf8') cur = conn.cursor() dt=datetime.now() now = dt.strftime( '%Y%m%d%H' ) #注意转义符 sql = "insert into shici(`name`,`content`,`time`) values (\'{0}\', \'{1}\', {2})" #for item in result['poem']: # insert_sql = match.format(item['title'], item['content'], ) #conn.close() try: for item in result_poems: print(item.get('meta', '')) for poem in item['poem']: print('name:{0}, content:{1}'.format(poem['title'], poem['content'])) insert_sql = sql.format(poem['title'].encode('utf-8'), poem['content'].encode('utf-8'), now) cur.execute(insert_sql) conn.commit() except: conn.rollback() conn.commit() conn.close()
-------------------广告线---------------
项目、合作,欢迎勾搭,邮箱:promall@qq.com
本文为呱牛笔记原创文章,转载无需和我联系,但请注明来自呱牛笔记 ,it3q.com