chromium: get-commit-message.py: Improve the parsing
The current stable release announcement [0] uses more HTML tags which broke the detection of "fixes" and "zero_days". Proper HTML parsing could be done using html.parser [1] but for our purposes the naive regex trick works well enough. [0]: https://chromereleases.googleblog.com/2021/07/stable-channel-update-for-desktop.html [1]: https://docs.python.org/3/library/html.parser.html
This commit is contained in:
parent
d38d4e060b
commit
3e93811d93
1 changed files with 2 additions and 2 deletions
|
@ -19,14 +19,14 @@ for entry in feed.entries:
|
||||||
continue
|
continue
|
||||||
url = requests.get(entry.link).url.split('?')[0]
|
url = requests.get(entry.link).url.split('?')[0]
|
||||||
content = entry.content[0].value
|
content = entry.content[0].value
|
||||||
|
content = html_tags.sub('', content) # Remove any HTML tags
|
||||||
if re.search(r'Linux', content) is None:
|
if re.search(r'Linux', content) is None:
|
||||||
continue
|
continue
|
||||||
#print(url) # For debugging purposes
|
#print(url) # For debugging purposes
|
||||||
version = re.search(r'\d+(\.\d+){3}', content).group(0)
|
version = re.search(r'\d+(\.\d+){3}', content).group(0)
|
||||||
print('chromium: TODO -> ' + version)
|
print('chromium: TODO -> ' + version)
|
||||||
print('\n' + url)
|
print('\n' + url)
|
||||||
if fixes := re.search(r'This update includes .+ security fixes\.', content):
|
if fixes := re.search(r'This update includes .+ security fixes\.', content).group(0):
|
||||||
fixes = html_tags.sub('', fixes.group(0))
|
|
||||||
zero_days = re.search(r'Google is aware( of reports)? that .+ in the wild\.', content)
|
zero_days = re.search(r'Google is aware( of reports)? that .+ in the wild\.', content)
|
||||||
if zero_days:
|
if zero_days:
|
||||||
fixes += " " + zero_days.group(0)
|
fixes += " " + zero_days.group(0)
|
||||||
|
|
Loading…
Reference in a new issue