8000 complete-python-course/course_contents/11_web_scraping at master · RITIKAPANDIT/complete-python-course · GitHub
[go: up one dir, main page]

Skip to content
< 4AAF script type="application/json" data-target="react-app.embeddedData">{"payload":{"allShortcutsEnabled":false,"path":"course_contents/11_web_scraping","repo":{"id":578279463,"defaultBranch":"master","name":"complete-python-course","ownerLogin":"RITIKAPANDIT","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2022-12-14T17:18:08.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/120336021?v=4","public":true,"private":false,"isOrgOwned":false},"currentUser":null,"refInfo":{"name":"master","listCacheKey":"v0:1671041131.162434","canEdit":false,"refType":"branch","currentOid":"405e8db04d3390e75c2bc99ba2677f22dce28194"},"tree":{"items":[{"name":"projects","path":"course_contents/11_web_scraping/projects","contentType":"directory"},{"name":"README.md","path":"course_contents/11_web_scraping/README.md","contentType":"file"}],"templateDirectorySuggestionUrl":null,"readme":{"displayName":"README.md","richText":"\u003carticle class=\"markdown-body entry-content container-lg\" itemprop=\"text\"\u003e\u003cmarkdown-accessiblity-table\u003e\u003ctable\u003e\n \u003cthead\u003e\n \u003ctr\u003e\n \u003cth\u003egroup\u003c/th\u003e\n \u003cth\u003ehidden\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003ctd\u003e\u003cdiv dir=\"auto\"\u003ePractical Python\u003c/div\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cdiv dir=\"auto\"\u003etrue\u003c/div\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\u003c/markdown-accessiblity-table\u003e\n\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eWeb Scraping\u003c/h1\u003e\u003ca id=\"user-content-web-scraping\" class=\"anchor\" aria-label=\"Permalink: Web Scraping\" href=\"#web-scraping\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn this section we look at web scraping using Python and the \u003ccode\u003erequests\u003c/code\u003e library.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst we learn about HTML and its structure, and how we can let Python understand it.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThen we build two scraper projects using \u003ccode\u003eBeautifulSoup4\u003c/code\u003e.\u003c/p\u003e\n\u003c/article\u003e","errorMessage":null,"headerInfo":{"toc":[{"level":1,"text":"Web Scraping","anchor":"web-scraping","htmlText":"Web Scraping"}],"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FRITIKAPANDIT%2Fcomplete-python-course%2Ftree%2Fmaster%2Fcourse_contents%2F11_web_scraping"}},"totalCount":2,"showBranchInfobar":true},"fileTree":{"course_contents":{"items":[{"name":"10_advanced_python","path":"course_contents/10_advanced_python","contentType":"directory"},{"name":"11_web_scraping","path":"course_contents/11_web_scraping","contentType":"directory"},{"name":"12_browser_automation_selenium","path":"course_contents/12_browser_automation_selenium","contentType":"directory"},{"name":"13_async_development","path":"course_contents/13_async_development","contentType":"directory"},{"name":"14_managing_projects_pipenv","path":"course_contents/14_managing_projects_pipenv","contentType":"directory"},{"name":"15_flask","path":"course_contents/15_flask","contentType":"directory"},{"name":"16_interacting_with_apis","path":"course_contents/16_interacting_with_apis","contentType":"directory"},{"name":"17_decorators","path":"course_contents/17_decorators","contentType":"directory"},{"name":"18_advanced_oop","path":"course_contents/18_advanced_oop","contentType":"directory"},{"name":"19_gui_development_tkinter","path":"course_contents/19_gui_development_tkinter","contentType":"directory"},{"name":"1_intro","path":"course_contents/1_intro","contentType":"directory"},{"name":"20_unit_testing","path":"course_contents/20_unit_testing","contentType":"directory"},{"name":"21_algorithms_data_structures","path":"course_contents/21_algorithms_data_structures","contentType":"directory"},{"name":"22_popular_libraries","path":"course_contents/22_popular_libraries","contentType":"directory"},{"name":"2_intro_to_python","path":"course_contents/2_intro_to_python","contentType":"directory"},{"name":"3_first_milestone_project","path":"course_contents/3_first_milestone_project","contentType":"directory"},{"name":"4_object_oriented_programming","path":"course_contents/4_object_oriented_programming","contentType":"directory"},{"name":"5_errors","path":"course_contents/5_errors","contentType":"directory"},{"name":"6_files","path":"course_contents/6_files","contentType":"directory"},{"name":"7_second_milestone_project","path":"course_contents/7_second_milestone_project","contentType":"directory"},{"name":"8_type_hinting","path":"course_contents/8_type_hinting","contentType":"directory"},{"name":"9_advanced_built_in_functions","path":"course_contents/9_advanced_built_in_functions","contentType":"directory"},{"name":"assets","path":"course_contents/assets","contentType":"directory"},{"name":"index-page-contents.md","path":"course_contents/index-page-contents.md","contentType":"file"}],"totalCount":24},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":".templates","path":".templates","contentType":"directory"},{"name":"assets","path":"assets","contentType":"directory"},{"name":"course_contents","path":"course_contents","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"static","path":"static","contentType":"directory"},{"name":".flake8","path":".flake8","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".pre-commit-config.yaml","path":".pre-commit-config.yaml","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"algolia.config.json","path":"algolia.config.json","contentType":"file"},{"name":"babel.config.js","path":"babel.config.js","contentType":"file"},{"name":"docusaurus.config.js","path":"docusaurus.config.js","contentType":"file"},{"name":"package-lock.json","path":"package-lock.json","contentType":"file"},{"name":"package.json","path":"package.json","contentType":"file"},{"name":"sidebars.js","path":"sidebars.js","contentType":"file"}],"totalCount":17}},"fileTreeProcessingTime":16.301425000000002,"foldersToFetch":[],"treeExpanded":true,"symbolsExpanded":false,"csrf_tokens":{"/RITIKAPANDIT/complete-python-course/branches":{"post":"xmE9sL0j8dfdecx0xAx5SB9eO-i4COp6YNCYP2qkJ8vofnEImB4Qw0W15cUoFem6_IRwnAEzgJXYULhQGxF_sA"},"/RITIKAPANDIT/complete-python-course/branches/fetch_and_merge/master":{"post":"nHiqncqsDYi7MwbGw0aAwglzOnj8LoSNq4ZChiDimk73Appf97cOlC2LyOP8oW_6IuxP_MTajpo42fXPk6Tsrg"},"/RITIKAPANDIT/complete-python-course/branches/fetch_and_merge/master?discard_changes=true":{"post":"bqU_uDMITAfj2K5eX4GUk_49yNfQUXoSYPefRY7uzGsF3w96DhNPG3VgYHtgZnur1aK9U-ilcAXzqCgMPai6iw"}}},"title":"complete-python-course/course_contents/11_web_scraping at master · RITIKAPANDIT/complete-python-course","appPayload":{"helpUrl":"https://docs.github.com","findFileWorkerPath":"/assets-cdn/worker/find-file-worker-7d7eb7c71814.js","findInFileWorkerPath":"/assets-cdn/worker/find-in-file-worker-1ae9fa256942.js","githubDevUrl":null,"enabled_features":{"code_nav_ui_events":false,"react_blob_overlay":false,"accessible_code_button":true,"github_models_repo_integration":false}}}
0