This repository has been archived on 2026-03-12. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
nlp-chatbot/qqmsg_process.py
2026-03-12 11:09:11 +08:00

41 lines
1.6 KiB
Python

import os
from datetime import datetime
files = os.walk("download/QQMsg")
outputFile = open("data/train_qq.txt", "w", encoding="utf-8")
for path, dir_list, file_list in files:
for file_name in file_list:
print(os.path.join(path, file_name))
f = open(os.path.join(path, file_name), "r", encoding="utf-8")
lines = f.readlines()
stat = 0 # 0: ready to parse time / 1: ready to parse log
lastTime = datetime.strptime("1970-1-1 00:00:00", "%Y-%m-%d %H:%M:%S")
for i in range(8, len(lines)):
raw = lines[i].replace("\r\n", "").replace("\n", "")
# 这一行是时间
timeStrs = raw.split(' ', 2)
try:
# 这一行是时间
if timeStrs[0][0] == '2':
tsStr = timeStrs[0] + " " + timeStrs[1]
else:
tsStr = timeStrs[1] + " " + timeStrs[2]
ts = datetime.strptime(tsStr, "%Y-%m-%d %H:%M:%S")
if ((ts - lastTime).seconds > 120) or ((ts - lastTime).seconds < 0):
# 间隔2分钟以上,认为是不同的对话
outputFile.write("\n")
lastTime = ts
except (IndexError, ValueError) as e:
# 这一行是消息
msg = raw.replace("[图片]", "").replace("[表情]", "").replace("[合并转发]请使用手机QQ最新版本查看", "")
if msg != "":
# 是有效行
outputFile.write(msg + "\n")
f.close()
outputFile.close()