chromium: get-commit-message.py: Improve the parsing

The current stable release announcement [0] uses more HTML tags which
broke the detection of "fixes" and "zero_days". Proper HTML parsing
could be done using html.parser [1] but for our purposes the naive regex
trick works well enough.

[0]: https://chromereleases.googleblog.com/2021/07/stable-channel-update-for-desktop.html
[1]: https://docs.python.org/3/library/html.parser.html
This commit is contained in:
Michael Weiss 2021-07-16 12:14:45 +02:00
parent d38d4e060b
commit 3e93811d93
No known key found for this signature in database
GPG key ID: 5BE487C4D4771D83

View file

@ -19,14 +19,14 @@ for entry in feed.entries:
continue continue
url = requests.get(entry.link).url.split('?')[0] url = requests.get(entry.link).url.split('?')[0]
content = entry.content[0].value content = entry.content[0].value
content = html_tags.sub('', content) # Remove any HTML tags
if re.search(r'Linux', content) is None: if re.search(r'Linux', content) is None:
continue continue
#print(url) # For debugging purposes #print(url) # For debugging purposes
version = re.search(r'\d+(\.\d+){3}', content).group(0) version = re.search(r'\d+(\.\d+){3}', content).group(0)
print('chromium: TODO -> ' + version) print('chromium: TODO -> ' + version)
print('\n' + url) print('\n' + url)
if fixes := re.search(r'This update includes .+ security fixes\.', content): if fixes := re.search(r'This update includes .+ security fixes\.', content).group(0):
fixes = html_tags.sub('', fixes.group(0))
zero_days = re.search(r'Google is aware( of reports)? that .+ in the wild\.', content) zero_days = re.search(r'Google is aware( of reports)? that .+ in the wild\.', content)
if zero_days: if zero_days:
fixes += " " + zero_days.group(0) fixes += " " + zero_days.group(0)