我将电子邮件归档到一堆EML文件中,但是当这些文件作为纯文本文件打开时,内容是不可读的。
我想知道是否有一个转换器(必须是免费的)可以将EML文件批量转换为包含主题、内容等的纯文本文件(附件可以忽略)。
发布于 2016-03-06 21:07:53
我采取了一个有点工作的脚本从这里开始,并修改它,以满足您的需要。
必须安装Python 2.7才能运行脚本。安装Python时,请确保选择Add python.exe to Path。
无论如何,您将脚本保存为[whatever name you want].py,并将其放入与所有.eml文件相同的文件夹中。现在打开剧本。就这样。
它将创建一个文件夹Email。该文件夹的子文件夹名与您的.eml文件相同。在该文件夹中,您可以找到所有附件和包含所有电子邮件信息的.txt文件:
请注意,有些电子邮件可能包含HTML。此脚本确实捕获了HTML,但它将其保存为纯文本,因此您必须复制粘贴.txt文件的HTML部分,将其保存为.htm,并使用任何web浏览器打开它。
要求:
该脚本如下:
from email import message_from_file
import os
def file_exists (f):
return os.path.exists(os.path.join(path, f).replace("\\","/"))
def save_file (fn, cont):
file = open(os.path.join(path, fn).replace("\\","/"), "wb")
file.write(cont)
file.close()
def construct_name (id, fn):
id = id.split(".")
id = id[0]+id[1]
return id+"."+fn
def disqo (s):
s = s.strip()
if s.startswith("'") and s.endswith("'"): return s[1:-1]
if s.startswith('"') and s.endswith('"'): return s[1:-1]
return s
def disgra (s):
s = s.strip()
if s.startswith("<") and s.endswith(">"): return s[1:-1]
return s
def pullout (m, key):
Html = ""
Text = ""
Files = {}
Parts = 0
if not m.is_multipart():
if m.get_filename():
fn = m.get_filename()
cfn = construct_name(key, fn)
Files[fn] = (cfn, None)
if file_exists(cfn): return Text, Html, Files, 1
save_file(cfn, m.get_payload(decode=True))
return Text, Html, Files, 1
cp = m.get_content_type()
if cp=="text/plain": Text += m.get_payload(decode=True)
elif cp=="text/html": Html += m.get_payload(decode=True)
else:
cp = m.get("content-type")
try: id = disgra(m.get("content-id"))
except: id = None
o = cp.find("name=")
if o==-1: return Text, Html, Files, 1
ox = cp.find(";", o)
if ox==-1: ox = None
o += 5; fn = cp[o:ox]
fn = disqo(fn)
cfn = construct_name(key, fn)
Files[fn] = (cfn, id)
if file_exists(cfn): return Text, Html, Files, 1
save_file(cfn, m.get_payload(decode=True))
return Text, Html, Files, 1
y = 0
while 1:
try:
pl = m.get_payload(y)
except: break
t, h, f, p = pullout(pl, key)
Text += t; Html += h; Files.update(f); Parts += p
y += 1
return Text, Html, Files, Parts
def extract (msgfile, key):
m = message_from_file(msgfile)
From, To, Subject, Date = caption(m)
Text, Html, Files, Parts = pullout(m, key)
Text = Text.strip(); Html = Html.strip()
msg = {"subject": Subject, "from": From, "to": To, "date": Date,
"text": Text, "html": Html, "parts": Parts}
if Files: msg["files"] = Files
return msg
def caption (origin):
Date = ""
if origin.has_key("date"): Date = origin["date"].strip()
From = ""
if origin.has_key("from"): From = origin["from"].strip()
To = ""
if origin.has_key("to"): To = origin["to"].strip()
Subject = ""
if origin.has_key("subject"): Subject = origin["subject"].strip()
return From, To, Subject, Date
if __name__ == "__main__":
global path
startdirname = "Email"
num = 1
for i in range(10000000):
if os.path.exists(startdirname + str(num)) == False:
os.makedirs("Email" + str(num))
break
else:
num += 1
for i in os.listdir("."):
if i.endswith(".eml") == True:
nam = i[:-4]
path = "./" + startdirname + str(num) + "/" + nam
os.makedirs("./" + startdirname + str(num) + "/" + nam)
f = open(i, "rb")
emailDict = extract(f, f.name)
f.close()
textFile = ""
froms = emailDict["from"]
tos = emailDict["to"]
subject = emailDict["subject"]
parts = emailDict["parts"]
date = emailDict["date"]
txt = emailDict["text"]
html = emailDict["html"]
files = []
for i in emailDict["files"]:
files.append(i)
textFile += "From: " + froms + "\n"
textFile += "To: " + tos + "\n"
textFile += "Subject: " + subject + "\n"
textFile += "Date: " + date + "\n\n"
textFile += "Files: " + ", ".join(files) + "\n"
textFile += "Parts: " + str(parts) + "\n\n"
textFile += "Text:\n\n" + txt + "\n\n"
textFile += "HTML:\n\n" + html
wf = open("./" + startdirname + str(num) + "/" + nam + "/" + "txt_" + nam + ".txt", "w")
wf.write(textFile)
wf.close()刚刚意识到这有多大了。希望你还需要它!
发布于 2016-01-19 18:22:56
https://softwarerecs.stackexchange.com/questions/28138
复制相似问题