8000 Scrape Hacktoberfest Events 2020 fixes#16 · jiyoungsin/python-scripts@0cae825 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0cae825

Browse files
committed
Scrape Hacktoberfest Events 2020 fixes#16
1 parent cb448c2 commit 0cae825

File tree

1 file changed

+55
-0
lines changed

1 file changed

+55
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import requests
2+
import pandas
3+
from bs4 import BeautifulSoup
4+
5+
# creating a soup object with html we got from the response
6+
url = "https://hacktoberfest.digitalocean.com/events"
7+
response = requests.get(url)
8+
html = response.text
9+
soup = BeautifulSoup(html)
10+
11+
# creating array of datas
12+
all_names = []
13+
all_locations = []
14+
all_dates = []
15+
all_time_zones = []
16+
all_urls = []
17+
18+
# itera 8000 ting on all the "tr" elements.
19+
for tr_element in soup.findAll("tr", attrs={"class": "past"}):
20+
21+
# for each tr element we find the proper value and add it to its proper array
22+
name_element = tr_element.find("td", attrs={"class": "event_name"})
23+
name = name_element.text.strip()
24+
all_names.append(name)
25+
26+
location_element = tr_element.find("td", attrs={"class": "location"})
27+
location = location_element.text.strip()
28+
all_locations.append(location)
29+
30+
date_element = tr_element.find("td", attrs={"data-label": "date"})
31+
date = date_element.text.strip()
32+
all_dates.append(date)
33+
34+
time_zone_element = tr_element.find("td", attrs={"data-label": "zone"})
35+
time_zone = time_zone_element.text.strip()
36+
all_time_zones.append(time_zone)
37+
38+
url_element = tr_element.find("a", attrs={"class": "emphasis"})
39+
url = url_element['href']
40+
all_urls.append(url)
41+
42+
# setting up our Comma Seperated Values
43+
csv_name = "events.csv"
44+
csv_structure = {
45+
"Name": all_names,
46+
"Location": all_locations,
47+
"Date": all_dates,
48+
"Time Zone": all_time_zones,
49+
"URL": all_urls,
50+
}
51+
# Creating a csv
52+
dataFrame = pandas.DataFrame(csv_structure)
53+
dataFrame.to_csv(csv_name, index=False, encoding='utf-8')
54+
55+

0 commit comments

Comments
 (0)
0