loaih/loaih/__init__.py

260 lines
8.2 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
"""machinery for compiling new versions of appimages."""
import datetime
import json
import re
import requests
import subprocess
import shlex
from lxml import html
# Constants
DOWNLOADPAGE = "https://www.libreoffice.org/download/download/"
ARCHIVE = "https://downloadarchive.documentfoundation.org/libreoffice/old/"
RELEASE = "https://download.documentfoundation.org/libreoffice/stable/"
DAILY = "https://dev-builds.libreoffice.org/daily/master/"
PRERELEASE = "https://dev-builds.libreoffice.org/pre-releases/deb/x86_64/"
SELECTORS = {
'still': {
'URL': DOWNLOADPAGE,
'xpath': '(//span[@class="dl_version_number"])[last()]/text()'
},
'fresh': {
'URL': DOWNLOADPAGE,
'xpath': '(//span[@class="dl_version_number"])[1]/text()'
},
'prerelease': {
'URL': DOWNLOADPAGE,
'xpath': '//p[@class="lead_libre"][last()]/following-sibling::ul[last()]/li/a/text()'
},
'daily': {
'URL': DAILY,
'xpath': '//td/a'
}
}
# Generic functions
def match_xpath(url: str, xpath: str):
"""Uses a couple of extensions to get results over webpage."""
resource = requests.get(url, timeout=10)
parsed = html.fromstring(resource.content)
return parsed.xpath(xpath)
# Classes
class Version():
"""Represent the skeleton of each queried version."""
def __init__(self):
self.query = ''
self.branch = ''
self.version = ''
self.urls = {
'x86': '-',
'x86_64': '-'
}
def appname(self):
"""Determines the app name based on the query branch determined."""
datematch = re.match(r'[0-9]{8}', self.query)
retval = 'LibreOffice'
if self.query in {'prerelease', 'daily', 'current', 'yesterday'} or datematch:
retval = 'LibreOfficeDev'
return retval
def cleanup_downloads(self, path, verbose=False) -> None:
"""Cleanups the downloads folder to assure new versions are built."""
search_name = self.appname() + '_' + self.version
cmd = f"find {path} -iname {search_name}\\*.tar.gz -delete"
if verbose:
subprocess.run(shlex.split(cmd), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
else:
subprocess.run(shlex.split(cmd))
def to_dict(self):
"""Returns a dictionary of versions."""
return {
'query': self.query,
'version': self.version,
'basedirurl': self.urls
}
def to_json(self):
"""Returns a json representation of the version."""
return json.dumps(self.to_dict())
def __str__(self):
return f"""query: {self.query}
version: {self.version}
x86: {self.urls['x86']}
x86_64: {self.urls['x86_64']}"""
class QueryError(Exception):
"""Standard exception for errors regarding queries."""
class Solver():
"""Generic solver to call others."""
def __init__(self, text: str, default_to_current = False):
self.text = text
self.branch = text
self.version = None
self.default_to_current = default_to_current
self.baseurl = ARCHIVE
def solve(self):
"""Splits the query text possibilities, calling all the rest of the solvers."""
solver = self
if self.text in { 'current', 'yesterday', 'daily' }:
solver = DailySolver(self.text, self.default_to_current)
elif self.text in { 'still', 'fresh', 'prerelease' }:
solver = NamedSolver(self.text)
elif '.' in self.text:
solver = NumberedSolver(self.text)
else:
try:
int(self.text)
solver = DailySolver(self.text, self.default_to_current)
except ValueError:
raise QueryError("The queried version does not exist.")
self.version = solver.solve()
self.baseurl = solver.baseurl
return self.version
def to_version(self):
retval = Version()
retval.query = self.text
retval.branch = self.branch
retval.version = self.version
if retval.branch != 'daily':
retval.urls['x86_64'] = self.baseurl + 'x86_64/'
try:
x86ver = match_xpath(self.baseurl + 'x86/', '//td/a/text()')
except Exception:
return retval
if len(x86ver) > 1:
retval.urls['x86'] = self.baseurl + 'x86/'
else:
retval.urls['x86_64'] = self.baseurl
return retval
@staticmethod
def parse(text: str, default_to_current = False):
"""Calling the same as solver class."""
retval = Solver(text, default_to_current)
retval.solve()
return retval.to_version()
class DailySolver(Solver):
"""Specific solver to daily queries."""
def __init__(self, text: str, default_to_current = False):
super().__init__(text, default_to_current)
self.branch = 'daily'
self.baseurl = DAILY
def solve(self):
"""Get daily urls based on query."""
x = "//td/a[starts-with(text(),'Linux-rpm_deb-x86') and contains(text(),'TDF/')]/text()"
tinderbox_segment = match_xpath(self.baseurl, x)[-1]
self.baseurl = self.baseurl + tinderbox_segment
# Reiterate now to search for the dated version
xpath_query = "//td/a/text()"
daily_set = match_xpath(self.baseurl, xpath_query)
matching = ''
today = datetime.datetime.today()
try:
int(self.text)
matching = datetime.datetime.strptime(self.text, "%Y%m%d").strftime('%Y-%m-%d')
except ValueError:
# All textual version
if self.text in { 'current', 'daily' }:
matching = 'current'
elif self.text == 'yesterday':
matching = (today + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
results = sorted([ x for x in daily_set if matching in x ])
if len(results) == 0:
# No daily versions found.
if self.default_to_current:
solver = DailySolver('current')
self.version = solver.version
self.baseurl = solver.baseurl
else:
self.baseurl = self.baseurl + results[-1]
# baseurl for x86 is not available for sure on daily builds.
xpath_string = "//td/a[contains(text(), '_deb.tar.gz')]/text()"
links = match_xpath(self.baseurl, xpath_string)
if len(links) > 0:
link = str(links[-1])
self.version = link.rsplit('/', maxsplit=1)[-1].split('_')[1]
return self.version
class NamedSolver(Solver):
"""Solves the query knowing that the input is a named query."""
def __init__(self, text: str):
super().__init__(text)
self.branch = text
self.baseurl = SELECTORS[self.text]['URL']
self.generalver = ''
def solve(self):
"""Get versions from query."""
xpath_query = SELECTORS[self.text]['xpath']
results = sorted(match_xpath(self.baseurl, xpath_query))
if len(results) > 0:
self.generalver = str(results[-1])
result: str = self.generalver
xpath_string = f"//td/a[starts-with(text(),'{result}')]/text()"
archived_versions = sorted(match_xpath(ARCHIVE, xpath_string))
if len(archived_versions) == 0:
return self.version
# Return just the last versions
fullversion: str = str(archived_versions[-1])
self.baseurl = ARCHIVE + fullversion + 'deb/'
self.version = fullversion.rstrip('/')
return self.version
class NumberedSolver(Solver):
"""Specific solver for numbered versions."""
def __init__(self, text: str):
super().__init__(text)
self.branch = '.'.join(text.split('.')[0-2])
def solve(self):
xpath_string = f"//td/a[starts-with(text(),'{self.text}')]/text()"
versions = sorted(match_xpath(self.baseurl, xpath_string))
if len(versions) == 0:
# It is possible that in the ARCHIVE there's no such version (might be a prerelease)
return self.version
version = str(versions[-1])
self.baseurl = self.baseurl + version + 'deb/'
self.version = version.rstrip('/')
return self.version