8000 Fix Ip2LocationWeb database implementation because of new webpage lay… · pythonthings/ip2geotools@29a7040 · GitHub
[go: up one dir, main page]

Skip to content

Commit 29a7040

Browse files
author
Tomas Caha
committed
Fix Ip2LocationWeb database implementation because of new webpage layout (selenium with Firefox required), better exception handling in Ipstack
1 parent 3f8c8fc commit 29a7040

File tree

6 files changed

+94
-83
lines changed

6 files changed

+94
-83
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
0.1.4 - 20-Feb-2019
2+
-------------------
3+
4+
* Fix ``ip2geotools.databases.commercial.Ip2LocationWeb`` by using ``selenium`` with Firefox because of new webpage layout
5+
* Better exception handling in ``ip2geotools.databases.noncommercial.Ipstack``
6+
17
0.1.3 - 27-Nov-2018
28
-------------------
39

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Basic usage
3636
>>> response.longitude
3737
16.6182105
3838
>>> response.to_json()
39-
'{"ip_address": "147.229.2.90", "city": "Brno (Brno st\u0159ed)", "region": "South Moravian", "country": "CZ", "latitude": 49.1926824, "longitude": 16.6182105}'
39+
'{"ip_address": "147.229.2.90", "city": "Brno (Brno střed)", "region": "South Moravian", "country": "CZ", "latitude": 49.1926824, "longitude": 16.6182105}'
4040
>>> response.to_xml()
4141
'<?xml version="1.0" encoding="UTF-8" ?><ip_location><ip_address>147.229.2.90</ip_address><city>Brno (Brno střed)</city><region>South Moravian</region><country>CZ</country><latitude>49.1926824</latitude><longitude>16.6182105</longitude></ip_location>'
4242
>>> response.to_csv(',')

ip2geotools/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
__title__ = 'ip2geotools'
55
__description__ = 'Simple tool for getting geolocation information on ' + \
66
'given IP address from various geolocation databases.'
7-
__version__ = '0.1.3'
7+
__version__ = '0.1.4'
88
__author__ = 'Tomas Caha'
99
__author_email__ = 'tomas-net@seznam.cz'
1010
__url__ = 'https://github.com/tomas-net/ip2geotools'
1111
__license__ = 'MIT License'
12-
__copyright__ = 'Copyright (c) 2018 Tomas Caha'
12+
__copyright__ = 'Copyright (c) 2019 Tomas Caha'

ip2geotools/databases/commercial.py

Lines changed: 30 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
import requests
1515
from requests.auth import HTTPBasicAuth
1616
import pyquery
17+
from selenium import webdriver # selenium for Ip2LocationWeb
18+
from selenium.webdriver.firefox.options import Options
19+
from selenium.webdriver.common.by import By
20+
from selenium.webdriver.support.ui import WebDriverWait
21+
from selenium.webdriver.support import expected_conditions as EC
1722

1823
from ip2geotools.databases.interfaces import IGeoIpDatabase
1924
from ip2geotools.models import IpLocation
@@ -198,24 +203,29 @@ class Ip2LocationWeb(IGeoIpDatabase):
198203

199204
@staticmethod
200205
def get(ip_address, api_key=None, db_path=None, username=None, password=None):
201-
# initial check for current limit
206+
# initiate headless Firefox using selenium to pass through Google reCAPTCHA
207+
options = Options()
208+
options.headless = True
209+
browser = webdriver.Firefox(options=options)
210+
202211
try:
203-
request = requests.get('http://www.ip2location.com/demo/',
204-
headers={'User-Agent': 'Mozilla/5.0'},
205-
timeout=62)
206-
except:
207-
raise ServiceError()
212+
browser.get('http://www.ip2location.com/demo/' + ip_address)
213+
element = WebDriverWait(browser, 30).until(
214+
EC.presence_of_element_located((By.NAME, 'ipAddress'))
215+
)
208216

209-
# check for HTTP errors
210-
if request.status_code != 200:
217+
if not element:
218+
raise Exception
219+
except:
211220
raise ServiceError()
212221

213222
# parse current limit
214223
current_limit = 0
224+
body = browser.find_element_by_tag_name('body').text
225+
215226
try:
216-
content = request.content.decode('utf-8')
217227
limit = re.search(r'You still have.*?([\d]{1,2})/50.* query limit',
218-
content,
228+
body,
219229
re.DOTALL)
220230

221231
if limit != None:
@@ -227,46 +237,21 @@ def get(ip_address, api_key=None, db_path=None, username=None, password=None):
227237
if current_limit == 0:
228238
raise LimitExceededError()
229239

230-
# process request
231-
try:
232-
request = requests.post('http://www.ip2location.com/demo/',
233-
headers={'User-Agent': 'Mozilla/5.0'},
234-
data=[('ipAddress', ip_address)],
235-
timeout=62)
236-
except:
237-
raise ServiceError()
238-
239-
# check for HTTP errors
240-
if request.status_code != 200:
241-
raise ServiceError()
242-
243240
# parse content
244241
try:
245-
content = request.content.decode('utf-8')
246-
pq = pyquery.PyQuery(content)
247-
parsed_ip = pq('html > body > div#main.container table:first tr:contains("IP Address") td:nth-child(2)') \
248-
.text() \
249-
.strip()
250-
parsed_country = pq('html > body > div#main.container table:first tr:contains("Country") td:nth-child(2) img') \
251-
.attr('src') \
252-
.strip() \
253-
.replace('/images/flags/', '') \
254-
.replace('.png', '') \
255-
.upper()
256-
parsed_region = pq('html > body > div#main.container table:first tr:contains("Region") td:nth-child(2)') \
257-
.eq(0) \
258-
.text() \
259-
.strip()
260-
parsed_city = pq('html > body > div#main.container table:first tr:contains("City") td:nth-child(2)') \
261-
.eq(0) \
262-
.text() \
263-
.strip()
264-
parsed_coords = pq('html > body > div#main.container table:first tr:contains("Latitude & Longitude of City") td:nth-child(2)') \
265-
.text() \
266-
.strip()
242+
table = browser.find_element_by_xpath('//table[contains(.,"Permalink")]')
243+
244+
parsed_ip = table.find_element_by_xpath('//tr[contains(.,"IP Address")]/td').text.strip()
245+
parsed_country = [class_name.replace('flag-icon-', '').upper() for class_name in table.find_element_by_class_name('flag-icon').get_attribute('class').split(' ') if class_name.startswith('flag-icon-')][0]
246+
parsed_region = table.find_element_by_xpath('//tr[contains(.,"Region")]/td').text.strip()
247+
parsed_city = table.find_element_by_xpath('//tr[contains(.,"City")]/td').text.strip()
248+
parsed_coords = table.find_element_by_xpath('//tr[contains(.,"Coordinates of City")]/td').text.strip()
267249
except:
268250
raise InvalidResponseError()
269251

252+
# exit headless firefox
253+
browser.quit()
254+
270255
# check for errors
271256
if ip_address != parsed_ip:
272257
raise IpAddressNotFoundError(ip_address)

ip2geotools/databases/noncommercial.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from ip2geotools.databases.interfaces import IGeoIpDatabase
1919
from ip2geotools.models import IpLocation
2020
from ip2geotools.errors import IpAddressNotFoundError, PermissionRequiredError, \
21-
InvalidRequestError, InvalidResponseError, ServiceError
21+
InvalidRequestError, InvalidResponseError, ServiceError, \
22+
LimitExceededError
2223

2324

2425
class DbIpCity(IGeoIpDatabase):
@@ -233,12 +234,7 @@ def get(ip_address, api_key=None, db_path=None, username=None, password=None):
233234

234235
# check for HTTP errors
235236
if request.status_code != 200:
236-
if request.status_code == 404:
237-
raise IpAddressNotFoundError(ip_address)
238-
elif request.status_code == 500:
239-
raise InvalidRequestError()
240-
else:
241-
raise ServiceError()
237+
raise ServiceError()
242238

243239
# parse content
244240
try:
@@ -247,6 +243,17 @@ def get(ip_address, api_key=None, db_path=None, username=None, password=None):
247243
except:
248244
raise InvalidResponseError()
249245

246+
# check for errors
247+
if content.get('error'):
248+
if content['error']['code'] == 101 \
249+
or content['error']['code'] == 102 \
250+
or content['error']['code'] == 105:
251+
raise PermissionRequiredError()
252+
elif content['error']['code'] == 104:
253+
raise LimitExceededError()
254+
else:
255+
raise InvalidRequestError()
256+
250257
# prepare return value
251258
ip_location = IpLocation(ip_address)
252259

requirements.txt

Lines changed: 41 additions &a E377 mp; 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,41 @@
1-
astroid>=1.5.3
2-
autopep8>=1.3.3
3-
certifi>=2017.7.27.1
4-
chardet>=3.0.4
5-
click>=6.7
6-
cssselect>=1.0.1
7-
decorator>=4.1.2
8-
dicttoxml>=1.7.4
9-
geocoder>=1.32.1
10-
geoip2>=2.6.0
11-
idna>=2.6
12-
IP2Location>=8.0.0
13-
isort>=4.2.15
14-
lazy-object-proxy>=1.3.1
15-
lxml>=4.1.0
16-
maxminddb>=1.3.0
17-
mccabe>=0.6.1
18-
packaging>=16.8
19-
pip-review>=1.0
20-
pycodestyle>=2.3.1
21-
pylint>=1.7.4
22-
pyparsing>=2.2.0
23-
pyquery>=1.3.0
24-
ratelim>=0.1.6
25-
requests>=2.18.4
26-
six>=1.11.0
27-
urllib3>=1.22
28-
wrapt>=1.10.11
1+
astroid==2.1.0
2+
autopep8==1.4.3
3+
bleach==3.0.2
4+
certifi==2018.10.15
5+
chardet==3.0.4
6+
Click==7.0
7+
cssselect==1.0.3
8+
decorator==4.3.0
9+
dicttoxml==1.7.4
10+
docutils==0.14
11+
future==0.17.1
12+
geocoder==1.38.1
13+
geoip2==2.9.0
14+
idna==2.7
15+
IP2Location==8.0.3
16+
isort==4.3.4
17+
lazy-object-proxy==1.3.1
18+
lxml==4.2.5
19+
maxminddb==1.4.1
20+
mccabe==0.6.1
21+
packaging==18.0
22+
pip-review==1.0
23+
pkginfo==1.4.2
24+
pycodestyle==2.4.0
25+
Pygments==2.3.0
26+
pylint==2.2.0
27+
pyparsing==2.3.0
28+
pyquery==1.4.0
29+
ratelim==0.1.6
30+
readme-renderer==24.0
31+
requests==2.20.1
32+
requests-toolbelt==0.8.0
33+
selenium==3.141.0
34+
six==1.11.0
35+
tqdm==4.28.1
36+
twine==1.12.1
37+
typed-ast==1.1.0
38+
typing==3.6.6
39+
urllib3==1.24.1
40+
webencodings==0.5.1
41+
wrapt==1.10.11

0 commit comments

Comments
 (0)
0