8000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 61de19b commit dc43586Copy full SHA for dc43586
experiments/web.py
@@ -0,0 +1,30 @@
1
+from bs4 import BeautifulSoup
2
+import csv
3
+import json
4
+import requests
5
+
6
7
+def main():
8
+ url = 'http://yahoo.com'
9
+ req = requests.get(url)
10
+ content = req.text
11
+ soup = BeautifulSoup(content, "html.parser")
12
13
+ headlines = []
14
+ for headline in soup.find_all("h3"):
15
+ raw_headline = headline.get_text()
16
+ headline = raw_headline.strip()
17
+ if len(headline) < 10:
18
+ continue
19
+ headlines.append(headline)
20
21
+ print(json.dumps(headlines))
22
23
+ with open("headlines-output.csv", 'w') as out_file:
24
+ writer = csv.writer(out_file, delimiter=',')
25
+ writer.writerow(['headline'])
26
+ for headline in headlines:
27
+ writer.writerow([headline])
28
29
+if __name__ == '__main__':
30
+ main()
0 commit comments