8000 new update · Brivno1234/python-scripts@89a2b66 · GitHub
[go: up one dir, main page]

Skip to content

Commit 89a2b66

Browse files
authored
new update
1 parent c507ba8 commit 89a2b66

File tree

1 file changed

+17
-21
lines changed

1 file changed

+17
-21
lines changed

scripts/08_basic_email_web_crawler.py

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
1-
import requests
2-
import re
3-
4-
# get url
5-
url = input('Enter a URL (include `http://`): ')
6-
7-
# connect to the url
8-
website = requests.get(url)
9-
10-
# read html
11-
html = website.text
12-
13-
# use re.findall to grab all the links
14-
links = re.findall('"((http|ftp)s?://.*?)"', html)
15-
emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html)
16-
17-
18-
# print the number of links in the list
19-
print("\nFound {} links".format(len(links)))
20-
for email in emails:
21-
print(email)
1+
from bs4 import BeautifulSoup
2+
import requests
3+
4+
5+
6+
7+
url = input('Enter a URL (include `http://`): ')
8+
9+
response = requests.get(url)
10+
html = response.text
11+
soup = BeautifulSoup(html,"html.parser")
12+
print(html)
13+
14+
list = []
15+
for i in soup.find_all("a",href=True):
16+
list.append(i)
17+
print("leited kink: ", i)

0 commit comments

Comments
 (0)
0