需要分析邮件数据,将邮件保存本地为html的文件
将windows端的html文件上传到linux,通过vim的:set fileencoding命令查看文档是utf-16-le编码的
import os
import codecs
from bs4 import BeautifulSoup
def parseFile(filepath):
try:
with open(filepath, 'r') as fp:
encoding = 'utf-16-le'
with codecs.open(filepath, 'r', encoding) as fp2:
soup = BeautifulSoup(fp2,'lxml')
print(soup)
except Exception,ex:
print '[ERROR]--',ex
if __name__ == '__main__':
filepath = './Signature.txt'
parseFile(filepath)
网友评论