Fixing daily URL breakage (issue #1).

As builds can come from different tinderboxes, adding an additional request and HTML parsing with xpath a final URL is less dependent on hardcoded strings.
This commit is contained in:
emiliano.vavassori 2023-12-01 21:20:15 +01:00
parent 0dc3c97758
commit 62248d862c
1 changed files with 24 additions and 7 deletions

View File

@ -2,15 +2,17 @@
# encoding: utf-8
import urllib.request
import datetime
from lxml import etree
from packaging.version import parse as parse_version
import datetime
class Definitions(object):
class Definitions():
"""Definitions for the module."""
DOWNLOADPAGE = "https://www.libreoffice.org/download/download/"
ARCHIVE = "https://downloadarchive.documentfoundation.org/libreoffice/old/"
RELEASE = "https://download.documentfoundation.org/libreoffice/stable/"
DAILY = "https://dev-builds.libreoffice.org/daily/master/Linux-rpm_deb-x86_64@tb87-TDF/"
DAILY = "https://dev-builds.libreoffice.org/daily/master/"
PRERELEASE = "https://dev-builds.libreoffice.org/pre-releases/deb/x86_64/"
SELECTORS = {
@ -32,7 +34,8 @@ class Definitions(object):
}
}
class Base(object):
class Base():
"""Contains methods that might be useful outside class."""
# Class for static methods which might be useful even outside the build
# scripts.
@ -43,15 +46,29 @@ class Base(object):
# x86 versions that it isn't really provided.
# As such, the return value must be a dictionary
# Fixing daily selector
# As seen, the number of the tinderbox building the daily version can
# change. We try to fulfill the void by adding a step.
tinderboxpage = etree.HTML(urllib.request.urlopen(Definitions.DAILY).read())
tburl = tinderboxpage.xpath(
"//td/a[starts-with(text(), 'Linux-rpm_deb-x86') and contains(text(), 'TDF/')]/text()"
)[0]
daily_selector = f"{Definitions.DAILY}{tburl}"
# Get the anchor for today's builds
a = etree.HTML(urllib.request.urlopen(Definitions.DAILY).read()).xpath("//td/a[contains(text(), '" + date.strftime('%Y-%m-%d') + "')]/text()")
if len(a) == 0:
raw_page = etree.HTML(urllib.request.urlopen(daily_selector).read())
results = raw_page.xpath(
f"""//td/a[contains(text(), "{date.strftime('%Y-%m-%d')}")]/text()"""
)
if len(results) == 0:
# No results found, no version found, let's return a
return { 'x86': '-', 'x86_64': '-' }
# On the contrary, more than a version is found. let's order the
# list and get the latest item
return { 'x86': '-', 'x86_64': Definitions.SELECTORS['daily']['URL'] + sorted(a)[-1] }
return { 'x86': '-', 'x86_64': f"{daily_selector}{sorted(results)[-1]}" }
@staticmethod
def dailyver(date = datetime.datetime.today()):