|
|
feedemail.py - zs - Zeitungsschau rss to email converter |
|
|
 |
git clone git://r-36.net/zs (git://r-36.net) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
feedemail.py (3777B) |
|
|
|
--- |
|
|
|
1 # |
|
|
|
2 # See LICENSE for licensing details. |
|
|
|
3 # |
|
|
|
4 # Copy me if you can. |
|
|
|
5 # by 20h |
|
|
|
6 # |
|
|
|
7 |
|
|
|
8 import smtplib |
|
|
|
9 from email.mime.text import MIMEText |
|
|
|
10 from email.mime.multipart import MIMEMultipart |
|
|
|
11 from email.utils import formataddr, formatdate, parseaddr |
|
|
|
12 from email.header import Header |
|
|
|
13 import time |
|
|
|
14 import subprocess |
|
|
|
15 import lxml.html |
|
|
|
16 import lxml.etree |
|
|
|
17 import urllib.parse |
|
|
|
18 |
|
|
|
19 import html2text |
|
|
|
20 |
|
|
|
21 def normalizeheader(hstr): |
|
|
|
22 if len(hstr) == 0: |
|
|
|
23 return "" |
|
|
|
24 try: |
|
|
|
25 return lxml.html.fromstring(hstr).text_content().\ |
|
|
|
26 replace(u"\xa0", "").\ |
|
|
|
27 replace("\n", " ").strip() |
|
|
|
28 except lxml.etree.ParserError: |
|
|
|
29 return "" |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 class LocalSendmail(object): |
|
|
|
33 cmd="/usr/sbin/sendmail -f \"%s\" \"%s\"" |
|
|
|
34 |
|
|
|
35 def __init__(self, cmd=None): |
|
|
|
36 if cmd != None: |
|
|
|
37 self.cmd = cmd |
|
|
|
38 |
|
|
|
39 def sendmail(self, faddr, taddr, msg): |
|
|
|
40 cmd = self.cmd % (faddr, taddr) |
|
|
|
41 p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE) |
|
|
|
42 p.communicate(input=msg.encode("utf8")) |
|
|
|
43 |
|
|
|
44 def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\ |
|
|
|
45 starttls="True", user=None, password=None, smtpcmd=None,\ |
|
|
|
46 smtpuselocal=False): |
|
|
|
47 articles = feed["articles"] |
|
|
|
48 |
|
|
|
49 for article in articles: |
|
|
|
50 if "text" in article: |
|
|
|
51 h2t = html2text.HTML2Text() |
|
|
|
52 h2t.body_width = 0 |
|
|
|
53 h2t.unicode_snob = 1 |
|
|
|
54 h2t.inline_links = 0 |
|
|
|
55 h2t.links_each_paragraph = 0 |
|
|
|
56 |
|
|
|
57 try: |
|
|
|
58 text = "%s\n" % (h2t.handle(article["text"])) |
|
|
|
59 except: |
|
|
|
60 text = article["text"] |
|
|
|
61 |
|
|
|
62 del h2t |
|
|
|
63 else: |
|
|
|
64 text = "" |
|
|
|
65 |
|
|
|
66 # Larger than 10 MiB, something is wrong. |
|
|
|
67 if len(text) > 10 * 1024 * 1024: |
|
|
|
68 continue |
|
|
|
69 |
|
|
|
70 if "title" in article: |
|
|
|
71 subject = Header( \ |
|
|
|
72 normalizeheader(article["title"]),\ |
|
|
|
73 "utf-8") |
|
|
|
74 else: |
|
|
|
75 subject = Header(normalizeheader(text[:20]),\ |
|
|
|
76 "utf-8") |
|
|
|
77 |
|
|
|
78 # Append metadata. |
|
|
|
79 if "link" in article: |
|
|
|
80 text = "%sURL: %s\n" % (text, article["link"]) |
|
|
|
81 if "file" in article: |
|
|
|
82 text = "%sEnclosure: %s\n" % (text, article["file"]) |
|
|
|
83 |
|
|
|
84 msg = MIMEText(text, "plain", "utf-8") |
|
|
|
85 |
|
|
|
86 if "email" in feed: |
|
|
|
87 faddr = feed["email"] |
|
|
|
88 else: |
|
|
|
89 faddr = "none@none.no" |
|
|
|
90 if "title" in feed: |
|
|
|
91 if "author" in article: |
|
|
|
92 fname = "%s: %s" % (feed["title"], \ |
|
|
|
93 article["author"]) |
|
|
|
94 else: |
|
|
|
95 fname = feed["title"] |
|
|
|
96 |
|
|
|
97 msg["From"] = formataddr((fname, faddr)) |
|
|
|
98 msg["To"] = formataddr(parseaddr(to)) |
|
|
|
99 if "updated" in article: |
|
|
|
100 msg["Date"] = formatdate(time.mktime(\ |
|
|
|
101 article["updated"].timetuple())) |
|
|
|
102 else: |
|
|
|
103 msg["Date"] = formatdate() |
|
|
|
104 msg["Subject"] = subject |
|
|
|
105 |
|
|
|
106 if "link" in article: |
|
|
|
107 if "://" not in article["link"]: |
|
|
|
108 aurl = urllib.parse.urljoin(feed["feeduri"],\ |
|
|
|
109 article["link"]) |
|
|
|
110 if "gopher://" in aurl: |
|
|
|
111 urls = urllib.parse.urlparse(aurl, \ |
|
|
|
112 allow_fragments=False) |
|
|
|
113 if urls.path.startswith("/0"): |
|
|
|
114 aurl = "%s://%s%s" % \ |
|
|
|
115 (urls.scheme, urls.netloc, \ |
|
|
|
116 urls.path.replace(\ |
|
|
|
117 "/0", "/1", 1)) |
|
|
|
118 if len(urls.query) > 0: |
|
|
|
119 aurl = "%s?%s" % \ |
|
|
|
120 (aurl, urls.query) |
|
|
|
121 else: |
|
|
|
122 aurl = article["link"] |
|
|
|
123 msg["X-RSS-URL"] = aurl |
|
|
|
124 if "link" in feed: |
|
|
|
125 msg["X-RSS-Feed"] = feed["link"] |
|
|
|
126 else: |
|
|
|
127 msg["X-RSS-Feed"] = feed["feeduri"] |
|
|
|
128 if "id" in article: |
|
|
|
129 msg["X-RSS-ID"] = article["id"] |
|
|
|
130 if "uuid" in article: |
|
|
|
131 msg["X-RSS-UUID"] = article["uuid"] |
|
|
|
132 if "tags" in article: |
|
|
|
133 msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\ |
|
|
|
134 "utf-8") |
|
|
|
135 msg["User-Agent"] = "Zeitungsschau" |
|
|
|
136 |
|
|
|
137 if smtpuselocal == "True": |
|
|
|
138 s = LocalSendmail(smtpcmd) |
|
|
|
139 s.sendmail(faddr, to, msg.as_string()) |
|
|
|
140 else: |
|
|
|
141 if ssl == "True": |
|
|
|
142 s = smtplib.SMTP_SSL(smtphost) |
|
|
|
143 else: |
|
|
|
144 s = smtplib.SMTP(smtphost) |
|
|
|
145 if smtpport != None: |
|
|
|
146 s.connect(smtphost, smtpport) |
|
|
|
147 else: |
|
|
|
148 s.connect(smtphost) |
|
|
|
149 |
|
|
|
150 s.ehlo() |
|
|
|
151 if ssl == "False" and starttls == "True": |
|
|
|
152 s.starttls() |
|
|
|
153 s.ehlo() |
|
|
|
154 |
|
|
|
155 if user != None and password != None: |
|
|
|
156 s.login(user, password) |
|
|
|
157 |
|
|
|
158 s.sendmail(faddr, to, msg.as_string()) |
|
|
|
159 s.quit() |
|
|
|
160 |
|