|
|
kvssachsen2atom - brcon2023-hackathons - Bitreichcon 2023 Hackathon Repository |
|
|
 |
git clone git://bitreich.org/brcon2023-hackathons git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/brcon2023-hackathons (git://bitreich.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
|
--- |
|
|
|
kvssachsen2atom (3188B) |
|
|
|
--- |
|
|
|
1 #!/usr/bin/env python |
|
|
|
2 # coding=utf-8 |
|
|
|
3 # |
|
|
|
4 # Copy me if you can. |
|
|
|
5 # by 20h |
|
|
|
6 # |
|
|
|
7 |
|
|
|
8 import os |
|
|
|
9 import sys |
|
|
|
10 import getopt |
|
|
|
11 |
|
|
|
12 from selenium import webdriver |
|
|
|
13 from selenium.webdriver.chrome.options import Options as chromeoptions |
|
|
|
14 from selenium.webdriver.support.ui import WebDriverWait |
|
|
|
15 from selenium.webdriver.support import expected_conditions as EC |
|
|
|
16 from selenium.webdriver.common.by import By |
|
|
|
17 |
|
|
|
18 from datetime import datetime |
|
|
|
19 import pytz |
|
|
|
20 |
|
|
|
21 def usage(app): |
|
|
|
22 app = os.path.basename(app) |
|
|
|
23 sys.stderr.write("usage: %s [-h] URI\n" % (app)) |
|
|
|
24 sys.exit(1) |
|
|
|
25 |
|
|
|
26 def main(args): |
|
|
|
27 try: |
|
|
|
28 opts, largs = getopt.getopt(args[1:], "h") |
|
|
|
29 except getopt.GetoptError as err: |
|
|
|
30 print(str(err)) |
|
|
|
31 usage(args[0]) |
|
|
|
32 |
|
|
|
33 for o, a in opts: |
|
|
|
34 if o == "-h": |
|
|
|
35 usage(args[0]) |
|
|
|
36 else: |
|
|
|
37 assert False, "unhandled option" |
|
|
|
38 |
|
|
|
39 if len(largs) < 1: |
|
|
|
40 usage(args[0]) |
|
|
|
41 |
|
|
|
42 link = largs[0] |
|
|
|
43 |
|
|
|
44 options = chromeoptions() |
|
|
|
45 chromearguments = [ |
|
|
|
46 "headless", |
|
|
|
47 "no-sandbox", |
|
|
|
48 "disable-extensions", |
|
|
|
49 "disable-dev-shm-usage", |
|
|
|
50 "start-maximized", |
|
|
|
51 "window-size=1900,1080", |
|
|
|
52 "disable-gpu" |
|
|
|
53 ] |
|
|
|
54 for carg in chromearguments: |
|
|
|
55 options.add_argument(carg) |
|
|
|
56 |
|
|
|
57 driver = webdriver.Chrome(options=options) |
|
|
|
58 driver.get(link) |
|
|
|
59 |
|
|
|
60 isnews = WebDriverWait(driver=driver, timeout=60).until( |
|
|
|
61 EC.presence_of_element_located((By.XPATH, |
|
|
|
62 "//div[@data-last-letter]") |
|
|
|
63 ) |
|
|
|
64 ) |
|
|
|
65 newslist = driver.find_elements(By.XPATH, "//div[@data-filter-target=\"list\"]")[0] |
|
|
|
66 |
|
|
|
67 title = driver.find_elements(By.XPATH, "//meta[@property=\"og:title\"]")[0].get_attribute("content") |
|
|
|
68 description = title |
|
|
|
69 globaltags = "" |
|
|
|
70 |
|
|
|
71 print("""<?xml version="1.0" encoding="utf-8"?>""") |
|
|
|
72 print("""<feed xmlns="http://www.w3.org/2005/Atom">""") |
|
|
|
73 print("\t<title><![CDATA[%s]]></title>" % (title)) |
|
|
|
74 print("\t<subtitle><![CDATA[%s]]></subtitle>" % (description)) |
|
|
|
75 print("\t<id>%s</id>" % (link)) |
|
|
|
76 print("\t<link href=\"%s\" rel=\"self\" />" % (link)) |
|
|
|
77 print("\t<link href=\"%s\" />" % (link)) |
|
|
|
78 |
|
|
|
79 utcnow = datetime.now(pytz.utc) |
|
|
|
80 print("\t<updated>%s</updated>" % (utcnow.isoformat())) |
|
|
|
81 |
|
|
|
82 articles = newslist.find_elements(By.XPATH, "./div") |
|
|
|
83 baselink = "/".join(link.split("/", 3)[:-1]) |
|
|
|
84 for article in articles[::-1]: |
|
|
|
85 link = article.find_elements(By.XPATH, "./a")[0] |
|
|
|
86 plink = link.get_attribute("href") |
|
|
|
87 if not plink.startswith("http"): |
|
|
|
88 plink = "%s/%s" % (baselink, plink) |
|
|
|
89 ptitle = link.get_attribute("data-title") |
|
|
|
90 pcontent = article.text |
|
|
|
91 pauthor = "sachsen@kvsachsen.de" |
|
|
|
92 |
|
|
|
93 # Normalize datetime. |
|
|
|
94 updateds = article.find_elements(By.XPATH, ".//time")[0].text |
|
|
|
95 try: |
|
|
|
96 dtupdated = datetime.strptime(updateds, "%d.%m.%Y") |
|
|
|
97 except ValueError: |
|
|
|
98 continue |
|
|
|
99 |
|
|
|
100 dtupdated = dtupdated.replace(hour=12, minute=0,\ |
|
|
|
101 second=0, tzinfo=pytz.utc) |
|
|
|
102 if dtupdated.year > utcnow.year: |
|
|
|
103 dtupdated = dtupdated.replace(year=utcnow.year) |
|
|
|
104 pupdated = dtupdated |
|
|
|
105 |
|
|
|
106 print("\t<entry>") |
|
|
|
107 print("\t\t<id>%s</id>" % (plink)) |
|
|
|
108 print("\t\t<title><![CDATA[%s]]></title>" % (ptitle)) |
|
|
|
109 print("\t\t<link href=\"%s\" />" % (plink)) |
|
|
|
110 print("\t\t<author><name>%s</name></author>" % (pauthor)) |
|
|
|
111 print("\t\t<updated>%s</updated>" % (pupdated.isoformat())) |
|
|
|
112 print("\t\t<content><![CDATA[%s]]></content>" % (pcontent)) |
|
|
|
113 print("\t</entry>") |
|
|
|
114 |
|
|
|
115 print("</feed>") |
|
|
|
116 |
|
|
|
117 return 0 |
|
|
|
118 |
|
|
|
119 if __name__ == "__main__": |
|
|
|
120 sys.exit(main(sys.argv)) |
|
|
|
121 |
|