大概這樣:
#########################################################
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
# 1. Set up Chrome options to avoid detection
options = Options()
options.add_argument("start-maximized") # Maximize window to mimic typical user behavior
options.add_experimental_option("excludeSwitches", ["enable-automation"]) # Exclude the automation switch
options.add_experimental_option('useAutomationExtension', False)
# You may also want to add a custom, realistic user agent string for an extra layer of customization
# options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36')
# 2. Initialize the WebDriver
driver = webdriver.Chrome(options=options)
# 3. Apply selenium_stealth
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
# 4. Navigate to the SSA website
driver.get("https://www.ssa.gov/oact/cola/awiseries.html")
html_string = driver.page_source
soup = BeautifulSoup(html_string, 'html.parser')
tbl = soup.find('table', attrs={'summary': "AWI series and annual changes"})
pd.read_html(StringIO(str(tbl)))[0].set_index('Year')
#########################################################