summaryrefslogtreecommitdiff
blob: 2b515b426898541af34c26247533b369fd6da6d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup


def get_opera_chromium_versions(base_url, start_version, end_version):
    """
    Extracts Opera and Chromium versions from the given base URL with version placeholders, 
    parsing content sections for versions from start_version to end_version (inclusive).

    Args:
        base_url: The base URL for Opera changelogs with a version placeholder (e.g., "https://blogs.opera.com/desktop/changelog-for-{version}/").
        start_version: The starting version to extract information for (inclusive).
        end_version: The ending version to extract information for (inclusive).

    Returns:
        A dictionary mapping Opera version to Chromium version. 
        If no update is mentioned, the previous Chromium version is used.
        For missing data or errors, "unknown" is used.
    """
    versions = {}
    chromium_version = None

    for version in range(start_version, end_version + 1):
        # Fix formatting issue:
        # OR  url = base_url.format(version)
        url = base_url.format(version)
        print(f"Processing version {version}")

        try:
            # Set a timeout to avoid hanging requests
            response = requests.get(url, timeout=5)
            response.raise_for_status()  # Raise exception for non-200 status codes

            soup = BeautifulSoup(response.content, 'html.parser')
            content = soup.find('div', class_='content')

            # Iterate through each section starting with an H4 element
            for section in content.find_all('h4'):
                version_str, date_str = section.text.strip().split(' – ')
                versions[version_str] = chromium_version

                # Process all content elements (including nested ones) until the next H4
                next_sibling = section.find_next_sibling(
                    lambda tag: tag.name is not None)  # Skip text nodes

                # Process content elements
                update_found = False
                while next_sibling and next_sibling.name != 'h4':
                    if next_sibling.name == 'ul':
                        for el in next_sibling.find_all('li'):
                            if 'Update Chromium' in el.text.strip():
                                update_found = True
                                break  # Stop iterating after finding update

                    # Assign Chromium version only if update is found
                    if update_found:
                        chromium_version = el.text.strip().split()[-1]

                    next_sibling = next_sibling.find_next_sibling(
                        lambda tag: tag.name is not None)  # Skip text nodes

                # Handle missing Chromium version
                if not chromium_version:
                    chromium_version = "unknown"

        except requests.exceptions.RequestException as e:
            if e.args and e.args[0] == 404:
                print(f"Version {version} not found (404)")
            else:
                print(f"Error fetching data for version {version}: {e}")
            chromium_version = None  # Reset chromium_version for next iteration

        except Exception as e:  # Catch other unexpected exceptions
            print(f"Unexpected error: {e}")
            chromium_version = None  # Reset chromium_version for next iteration

    return versions


def remediate_unknown_versions(versions):
    """
    Remediates entries with "unknown" values in the versions dictionary by
    assuming no change from the previous known version.

    Args:
        versions: A dictionary mapping Opera version to Chromium version.

    Returns:
        The modified versions dictionary with "unknown" values replaced based on previous entries.
    """
    previous_version = None
    for version, chromium_version in versions.items():
        if chromium_version == "unknown":
            if previous_version is not None:
                # Update with previous version
                versions[version] = previous_version
        else:
            previous_version = chromium_version  # Update known version for future references
    return versions


# Example usage
# Base URL with version placeholder
base_url = "https://blogs.opera.com/desktop/changelog-for-{}/"
opera_chromium_versions = get_opera_chromium_versions(base_url, 100, 110)

opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions)

if opera_chromium_versions:
    for opera_version, chromium_version in opera_chromium_versions.items():
        print(
            f"Opera Version: {opera_version}, Chromium Version: {chromium_version}")
else:
    print("Failed to extract any versions.")