初始化
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
files = os.walk("download/QQMsg")
|
||||
|
||||
outputFile = open("data/train_qq.txt", "w", encoding="utf-8")
|
||||
|
||||
for path, dir_list, file_list in files:
|
||||
for file_name in file_list:
|
||||
print(os.path.join(path, file_name))
|
||||
f = open(os.path.join(path, file_name), "r", encoding="utf-8")
|
||||
lines = f.readlines()
|
||||
stat = 0 # 0: ready to parse time / 1: ready to parse log
|
||||
lastTime = datetime.strptime("1970-1-1 00:00:00", "%Y-%m-%d %H:%M:%S")
|
||||
for i in range(8, len(lines)):
|
||||
raw = lines[i].replace("\r\n", "").replace("\n", "")
|
||||
|
||||
# 这一行是时间
|
||||
timeStrs = raw.split(' ', 2)
|
||||
try:
|
||||
# 这一行是时间
|
||||
if timeStrs[0][0] == '2':
|
||||
tsStr = timeStrs[0] + " " + timeStrs[1]
|
||||
else:
|
||||
tsStr = timeStrs[1] + " " + timeStrs[2]
|
||||
ts = datetime.strptime(tsStr, "%Y-%m-%d %H:%M:%S")
|
||||
if ((ts - lastTime).seconds > 120) or ((ts - lastTime).seconds < 0):
|
||||
# 间隔2分钟以上,认为是不同的对话
|
||||
outputFile.write("\n")
|
||||
lastTime = ts
|
||||
except (IndexError, ValueError) as e:
|
||||
# 这一行是消息
|
||||
msg = raw.replace("[图片]", "").replace("[表情]", "").replace("[合并转发]请使用手机QQ最新版本查看", "")
|
||||
if msg != "":
|
||||
# 是有效行
|
||||
outputFile.write(msg + "\n")
|
||||
|
||||
f.close()
|
||||
|
||||
outputFile.close()
|
||||
Reference in New Issue
Block a user