import os from datetime import datetime files = os.walk("download/QQMsg") outputFile = open("data/train_qq.txt", "w", encoding="utf-8") for path, dir_list, file_list in files: for file_name in file_list: print(os.path.join(path, file_name)) f = open(os.path.join(path, file_name), "r", encoding="utf-8") lines = f.readlines() stat = 0 # 0: ready to parse time / 1: ready to parse log lastTime = datetime.strptime("1970-1-1 00:00:00", "%Y-%m-%d %H:%M:%S") for i in range(8, len(lines)): raw = lines[i].replace("\r\n", "").replace("\n", "") # 这一行是时间 timeStrs = raw.split(' ', 2) try: # 这一行是时间 if timeStrs[0][0] == '2': tsStr = timeStrs[0] + " " + timeStrs[1] else: tsStr = timeStrs[1] + " " + timeStrs[2] ts = datetime.strptime(tsStr, "%Y-%m-%d %H:%M:%S") if ((ts - lastTime).seconds > 120) or ((ts - lastTime).seconds < 0): # 间隔2分钟以上,认为是不同的对话 outputFile.write("\n") lastTime = ts except (IndexError, ValueError) as e: # 这一行是消息 msg = raw.replace("[图片]", "").replace("[表情]", "").replace("[合并转发]请使用手机QQ最新版本查看", "") if msg != "": # 是有效行 outputFile.write(msg + "\n") f.close() outputFile.close()