You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{"payload":{"allShortcutsEnabled":false,"fileTree":{"":{"items":[{"name":"scrapy_scraping","path":"scrapy_scraping","contentType":"directory"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"requests-beautifulsoup-scraper.py","path":"requests-beautifulsoup-scraper.py","contentType":"file"},{"name":"selenium-scraper.py","path":"selenium-scraper.py","contentType":"file"},{"name":"titles.csv","path":"titles.csv","contentType":"file"},{"name":"titles.json","path":"titles.json","contentType":"file"}],"totalCount":6}},"fileTreeProcessingTime":7.919051,"foldersToFetch":[],"incompleteFileTree":false,"repo":{"id":910415592,"defaultBranch":"main","name":"Python-web-scraping","ownerLogin":"luminati-io","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2024-12-31T07:59:38.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/19207323?v=4","public":true,"private":false,"isOrgOwned":true},"codeLineWrapEnabled":false,"symbolsExpanded":false,"treeExpanded":true,"refInfo":{"name":"main","listCacheKey":"v0:1735631979.826916","canEdit":false,"refType":"branch","currentOid":"0e51cb49162675938357832d37b5b581beee5d07"},"path":"selenium-scraper.py","currentUser":null,"blob":{"rawLines":["from selenium import webdriver\r","from selenium.webdriver.chrome.service import Service\r","from selenium.webdriver.chrome.options import Options\r","from selenium.webdriver.common.by import By\r","import csv\r","\r","# Set up the WebDriver that operates in headless mode\r","options = Options()\r","options.add_argument(\"--headless\")\r","driver = webdriver.Chrome(service=Service(), options=options)\r","\r","# URL of the page to scrape\r","url = \"https://en.wikipedia.org/wiki/Web_scraping\"\r","\r","# Open the URL in the browser\r","driver.get(url)\r","\r","# List to store the scraped titles\r","titles = []\r","\r","# List of header levels (h1, h2, h3, h4, h5)\r","title_level_list = [1, 2, 3, 4, 5]\r","\r","# Loop through each header level (h1, h2, h3, h4, h5)\r","for title_level in title_level_list:\r"," # Find all elements of the current header level using a CSS Selector\r"," title_elements = driver.find_elements(By.CSS_SELECTOR, f\"h{title_level}\")\r","\r"," # Loop through each title element found\r"," for title_element in title_elements:\r"," # Data extraction logic\r"," tag = title_element.tag_name\r"," text = title_element.text\r","\r"," # Create a dictionary with the tag and the title text\r"," title = {\r"," \"tag\": tag,\r"," \"title\": text,\r"," }\r","\r"," # Append the dictionary to the titles list\r"," titles.append(title)\r","\r","# Close the browser\r","driver.quit()\r","\r","# Open a CSV file to write the data\r","with open(\"titles.csv\", mode=\"w\", newline=\"\", encoding=\"utf-8\") as file:\r"," # Create a CSV writer object and specify the fieldnames (columns)\r"," writer = csv.DictWriter(file, fieldnames=[\"tag\", \"title\"])\r","\r"," # Write the header (column names) to the CSV file\r"," writer.writeheader()\r","\r"," # Write each row (dictionary) to the CSV file\r"," for row in titles:\r"," writer.writerow(row)\r"],"stylingDirectives":null,"colorizedLines":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":false,"configFilePath":null,"networkDependabotPath":"/luminati-io/Python-web-scraping/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":null},"displayName":"selenium-scraper.py","displayUrl":"https://github.com/luminati-io/Python-web-scraping/blob/main/selenium-scraper.py?raw=true","headerInfo":{"blobSize":"1.75 KB","deleteTooltip":"You must be signed in to make or propose changes","editTooltip":"You must be signed in to make or propose changes","ghDesktopPath":"https://desktop.github.com","isGitLfs":false,"onBranch":true,"shortPath":"0fd7cd9","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2Fluminati-io%2FPython-web-scraping%2Fblob%2Fmain%2Fselenium-scraper.py","isCSV":false,"isRichtext":false,"toc":null,"lineInfo":{"truncatedLoc":"57","truncatedSloc":"44"},"mode":"file"},"image":false,"isCodeownersFile":null,"isPlain":false,"isValidLegacyIssueTemplate":false,"issueTemplate":null,"discussionTemplate":null,"language":"Python","languageID":303,"large":false,"planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/luminati-io/Python-web-scraping/blob/main/selenium-scraper.py","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","releasePath":"/luminati-io/Python-web-scraping/releases/new?marketplace=true","showPublishActionBanner":false},"rawBlobUrl":"https://github.com/luminati-io/Python-web-scraping/raw/refs/heads/main/selenium-scraper.py","renderImageOrRaw":false,"richText":null,"renderedFileInfo":null,"shortPath":null,"symbolsEnabled":true,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":null},"copilotInfo":null,"copilotAccessAllowed":false,"modelsAccessAllowed":false,"modelsRepoIntegrationEnabled":false,"csrf_tokens":{"/luminati-io/Python-web-scraping/branches":{"post":"ap_T1JahDBXJYGuPTISILKbxN4u1GeztoZeD1NlRvo4Bx9VOBeQo8n1mRiEq2BIWREt3c32fuY1_spJKxu1diA"},"/repos/preferences":{"post":"3uIpauXr00EJNE-8GHo7o9cgd7UvsSqKoFeH9SrRPoFwv2A1UIoEwAX2gUui2ES-GRzjgEsBnFKs_tTgpCaWQg"}}},"title":"Python-web-scraping/selenium-scraper.py at main · luminati-io/Python-web-scraping","appPayload":{"helpUrl":"https://docs.github.com","findFileWorkerPath":"/assets-cdn/worker/find-file-worker-263cab1760dd.js","findInFileWorkerPath":"/assets-cdn/worker/find-in-file-worker-b84e9496fc59.js","githubDevUrl":null,"enabled_features":{"code_nav_ui_events":false,"react_blob_overlay":false,"accessible_code_button":true}}}