gemini.git
going-flying.com gemini git repositorysummary
tree
log
refs
gemini.git/files/thoughts-to-gemini.py | 10 KB
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
'''thoughts-to-gemini.py (c) 2020-2024 Matthew J Ernisse <matt@going-flying.com>
All Rights Reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided
that the following conditions are met:
* Redistributions of source code must retain the
above copyright notice, this list of conditions
and the following disclaimer.
* Redistributions in binary form must reproduce
the above copyright notice, this list of conditions
and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import datetime
import jinja2
import json
import os
import requests
import sys
import time
import warnings
import zoneinfo
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from feedgen.feed import FeedGenerator
entry_template = '''โโโโโโโฃโ {{ entry.date }} โโโโโโโโโโโโโ
{% if entry.in_reply_to_date %}
โ In-Reply-To: {{ entry.in_reply_to_date }}
{% endif %}
{{ entry.message }}
{% if entry.attachment %}
Attachments:
{% for attachment in entry.attachment %}
=> {{ attachment.name|urlencode }} {{ attachment.type }}
{% endfor %}
{% endif %}
'''
index_template = '''```
_______ __ __ __
|_ _|| |--..-----..--.--..-----.| |--.| |_ .-----.
| | | || _ || | || _ || || _||__ --|
|___| |__|__||_____||_____||___ ||__|__||____||_____|
|_____|
๐ญ Random Thoughts
{% for year in thoughts.years %}
{{ year }}
{% for month in thoughts.byYear(year) %}
{{ month }}
{% for entry in thoughts.forMonth(year, month) %}
{{ entry }}
{% endfor %}
{% endfor %}
{% endfor %}
ฮฉ
๐ฐ๏ธ Generated at {{ build_time }}
๐ ยฉ MMXX-MMXXIV matt@going-flying.com
'''
URLBASE = 'gemini://going-flying.com/thoughts/'
WEBBASE = 'https://www.going-flying.com/thoughts/'
warnings.filterwarnings(
'ignore',
category=MarkupResemblesLocatorWarning
)
class DeHTMLizer(object):
'''Converter for the lightweight Thoughts HTML into gemini's
markup language.
'''
def __init__(self, s):
''' Given a HTML string, convert it into text/gemini '''
soup = BeautifulSoup(s, 'lxml')
self.gemini = ''
self.links = []
if not s or not soup.find('body'):
return
for el in soup.find('body').contents:
self.gemini += self.parseElement(el)
def __str__(self):
if len(self.gemini) == 0:
return '~ NO MESSAGE ~'
if len(self.links) == 0:
return self.gemini
trailer = '\n\n'
for n, link in enumerate(self.links):
trailer += f'=> {link} [{n + 1}] {link}\n'
return self.gemini + trailer
def parseElement(self, el):
''' Parse an Element from BeautifulSoup, this will recursively
call parseTag on nested tags as needed. It also handles
the difference between a Tag and a NavigableString.
'''
if el.name is not None:
return self.parseTag(el)
elif el.string is not None:
return el.string
else:
return ''
def parseTag(self, tag):
''' Convert HTML tags into various plain-text formatted
elements. Handle nested blockquote and p tags and create
a list of links in self.links that can be used in any way
the caller desires.
Strips style and script elements completely. Converts
blockquote to >, pre to ```, strong to ** and a few more.
'''
nestable = ['blockquote', 'div', 'p']
noprint = ['style', 'script']
if tag.name == 'a':
self.links.append(tag['href'])
num = len(self.links)
sNum = SuperNum(num)
if not tag.string:
return f'ยซ{tag["href"]}ยป{sNum!s}'
return f'ยซ{tag.string}ยป{sNum!s}'
elif tag.name == 'br':
return '\n'
elif tag.name == 'del':
return f'{tag.string}^W'
elif tag.name == 'pre':
return f'```\n{tag.string}\n```'
elif tag.name in nestable:
buf = ''
if hasattr(tag, 'contents'):
for el in tag.contents:
buf += self.parseElement(el)
else:
buf = tag.string
if tag.name == 'blockquote':
return f'> {buf}'
return buf
# These are also nestable, sort of?.
elif tag.name in ['em', 'strong']:
buf = ''
if hasattr(tag, 'contents'):
for el in tag.contents:
buf += self.parseElement(el)
else:
if tag.string is not None:
buf += tag.string
return f'*{buf}*'
elif tag.name in noprint:
return ''
elif tag.string == None:
return ''
else:
return tag.string
class Thoughts(object):
''' Render Thoughts from the API and save state to disk.'''
attachurl = 'https://thoughtsassets.blob.core.windows.net/assets'
def __init__(self, thoughtdir):
self.api = ThoughtApi()
self.thoughtdir = thoughtdir
self.thoughts = []
self._years = {}
if not os.path.exists(thoughtdir):
raise ValueError('Dir does not exist')
t_json = os.path.join(thoughtdir, 'thoughts.json')
if os.path.exists(t_json):
with open(t_json, 'r', encoding='utf-8') as fd:
self.thoughts = json.load(fd)
if len(self.thoughts) != 0:
local_newest = self.thoughts[0]['id']
if self.api.newest > local_newest:
_t = ThoughtApi(local_newest)
self.thoughts.extend(_t.thoughts)
else:
_t = ThoughtApi()
self.thoughts = list(_t.thoughts)
self.thoughts.sort(key=lambda k: k['id'], reverse=True)
# Save the date of the In-Reply-To thought on the
# in_reply_to property insted of the ID since we
# would much rather print the datestr out.
for thought in self.thoughts:
if thought.get('in-reply-to'):
tId = thought['in-reply-to']
parent = [
t['date'] for t in self.thoughts
if t['id'] == tId
][0]
thought['in_reply_to_date'] = parent
with open(t_json, 'w', encoding='utf-8') as fd:
json.dump(
self.thoughts,
fd,
ensure_ascii=False
)
self.tmpl = jinja2.Template(
entry_template,
trim_blocks=True,
lstrip_blocks=True
)
for thought in self.thoughts:
self._processThought(thought)
self._downloadAttachments(thoughtdir, thought)
now = datetime.datetime.now(zoneinfo.ZoneInfo('US/Eastern'))
tmpl = jinja2.Template(
index_template,
trim_blocks=True,
lstrip_blocks=True
)
outFile = os.path.join(thoughtdir, 'index.gmi')
with open(outFile, 'w', encoding='utf-8') as fd:
fd.write(tmpl.render({
'build_time': now.strftime('%c %z'),
'thoughts': self
}))
# Generate the atom feed.
feed = FeedGenerator()
feed.id(URLBASE)
feed.title('Thoughts from mernisse')
feed.author({
'name': 'mernisse',
'email': 'matt@going-flying.com'
})
feed.link(
href=URLBASE,
rel='alternate'
)
feed.link(
href=URLBASE + 'atom.xml',
rel='self'
)
for entry in self.thoughts:
pubdate = datetime.datetime.utcfromtimestamp(
entry['id']
)
pubdate = pubdate.replace(
tzinfo=zoneinfo.ZoneInfo('UTC')
)
e = feed.add_entry()
message = ''
if entry.get('in-reply-to'):
message += 'In-Reply-To: '
message += entry['in_reply_to_date'] + '\n'
message += str(entry['message'])
e.content(content=message, type='text')
e.id(str(entry['id']))
e.title('A brief thought from mernisse')
e.link(
href=f'{ WEBBASE }{ entry["id"] }.html',
rel='alternate',
type='text/html'
)
e.updated(pubdate)
outFile = os.path.join(thoughtdir, 'atom.xml')
feed.atom_file(outFile)
def _downloadAttachments(self, localdir, thought):
if 'attachment' not in thought:
return
for a in thought['attachment']:
outFile = os.path.join(localdir, a['name'])
if os.path.exists(outFile):
continue
resp = requests.get(self.attachurl + '/' + a['name'])
resp.raise_for_status()
with open(outFile, 'wb') as fd:
fd.write(resp.content)
def _processThought(self, thought):
dt = datetime.datetime.utcfromtimestamp(thought['id'])
if dt.year not in self._years:
self._years[dt.year] = {}
month = dt.strftime('%B')
if month not in self._years[dt.year]:
self._years[dt.year][month] = []
thought['message'] = DeHTMLizer(thought['message'])
self._years[dt.year][month].append(
self.tmpl.render(entry=thought)
)
@property
def years(self):
for year in self._years.keys():
yield year
def byYear(self, year):
return self._years[year].keys()
def forMonth(self, year, month):
return self._years[year][month]
class SuperNum(object):
''' Return given number as unicode superscript. '''
_u = ['โฐ', 'ยน', 'ยฒ', 'ยณ', 'โด', 'โต', 'โถ', 'โท', 'โธ', 'โน']
def __init__(self, val):
try:
int(val)
except ValueError:
raise ValueError('Value must be a base 10 integer')
self.val = str(val)
def __str__(self):
return ''.join([self._u[ord(ch) - 48] for ch in self.val])
class ThoughtApi(object):
''' Provide an interface to my Thoughts. '''
def __init__(self, since=0):
self.since = since
@property
def newest(self):
''' Return the ID of the newest thought. '''
_t = self._get(1, before=int(time.time()))[0]
return _t['id']
@property
def oldest(self):
_t = self._get(1, since=0)[0]
return _t['id']
@property
def thoughts(self):
''' Fetch the thoughts from the API and emit them. '''
more = True
while more:
_t = self._getRange()
if len(_t) < 25:
more = False
for thought in _t:
self.since = thought['id']
yield thought
def _get(self, count=25, before=None, since=None):
headers = {'User-Agent': 'thought-to-gemini/1.0'}
params = {'count': count}
if before is not None:
params['before'] = before
if since is not None:
params['since'] = since
resp = requests.get(
'https://vociferate.azurewebsites.net/api/thoughts',
headers=headers,
params=params
)
resp.raise_for_status()
thoughts = resp.json()
thoughts.sort(key=lambda k: k['id'])
return thoughts
def _getRange(self):
''' Return a range of 25 thoughts from self.since. '''
return self._get(since=self.since)
if __name__ == '__main__':
if len(sys.argv) != 2:
print(f'Usage: {os.path.basename(sys.argv[0])} path')
print()
print('This will write all Thoughts to index.gmi at the given')
print('path and download all attachments there as well.')
sys.exit(1)
localdir = sys.argv[1]
if not os.path.exists(localdir):
print(f'{localdir} does not exist or is not readable.')
sys.exit(1)
Thoughts(localdir)
Response: 20 (Success), text/gemini
| Original URL | gemini://going-flying.com/git/cgi/gemini.git/tree/master/... |
|---|---|
| Status Code | 20 (Success) |
| Content-Type | text/gemini; charset=utf-8 |