jaaks
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎section11/projects/scraping-books/app.py
Lines changed: 32 additions & 0 deletions b/‎section11/projects/scraping-books/app.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎section11/projects/scraping-books/locators/__init__.py b/‎section11/projects/scraping-books/locators/__init__.py
diff --git a/‎section11/projects/scraping-books/locators/all_books_page.py
Lines changed: 3 additions & 0 deletions b/‎section11/projects/scraping-books/locators/all_books_page.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎section11/projects/scraping-books/locators/book_locators.py
Lines changed: 11 additions & 0 deletions b/‎section11/projects/scraping-books/locators/book_locators.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎section11/projects/scraping-books/menu.py
Lines changed: 59 additions & 0 deletions b/‎section11/projects/scraping-books/menu.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎section11/projects/scraping-books/pages/__init__.py b/‎section11/projects/scraping-books/pages/__init__.py
diff --git a/‎section11/projects/scraping-books/pages/all_books_page.py
Lines changed: 30 additions & 0 deletions b/‎section11/projects/scraping-books/pages/all_books_page.py
Lines changed: 30 additions & 0 deletions
diff --git a/‎section11/projects/scraping-books/parsers/__init__.py b/‎section11/projects/scraping-books/parsers/__init__.py
diff --git a/‎section11/projects/scraping-books/parsers/book.py
Lines changed: 72 additions & 0 deletions b/‎section11/projects/scraping-books/parsers/book.py
Lines changed: 72 additions & 0 deletions
@@ -1,3 +1,4 @@
+logs.txt
 data.db
 *.key
 *.indd
 
@@ -0,0 +1,32 @@
+import requests
+import logging
+
+from pages.all_books_page import AllBooksPage
+
+logging.basicConfig(format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
+                    datefmt='%d-%m-%Y %H:%M:%S',
+                    level=logging.INFO,
+                    filename='logs.txt')
+logger = logging.getLogger('scraping')
+
+print('Loading books list...')
+logger.info('Loading books list.')
+
+logger.info('Requesting http://books.toscrape.com')
+page_content = requests.get('http://books.toscrape.com').content
+
+logger.debug('Creating AllBooksPage from page content.')
+page = AllBooksPage(page_content)
+
+_books = []
+
+logger.info(f'Going through {page.page_count} pages of books...')
+for page_num in range(page.page_count):
+    url = f'http://books.toscrape.com/catalogue/page-{page_num+1}.html'
+    logger.info(f'Requesting {url}')
+    page_content = requests.get(url).content
+    logger.debug('Creating AllBooksPage from page content.')
+    page = AllBooksPage(page_content)
+    _books.extend(page.books)
+
+books = _books
@@ -0,0 +1,3 @@
+class AllBooksPageLocators:
+    BOOKS = 'div.page_inner section li.col-xs-6.col-sm-4.col-md-3.col-lg-3'
+    PAGER = 'div.page_inner section ul.pager li.current'
@@ -0,0 +1,11 @@
+class BookLocators:
+    """
+    Locators for an item in the HTML page.
+
+    This allows us to easily see what our code will be looking at
+    as well as change it quickly if we notice it is now different.
+    """
+    NAME_LOCATOR = 'article.product_pod h3 a'
+    LINK_LOCATOR = 'article.product_pod h3 a'
+    PRICE_LOCATOR = 'article.product_pod p.price_color'
+    RATING_LOCATOR = 'article.product_pod p.star-rating'
@@ -0,0 +1,59 @@
+import logging
+
+from app import books
+
+logger = logging.getLogger('scraping.menu')
+
+
+USER_CHOICE = '''Enter one of the following
+
+- 'b' to look at 5-star books
+- 'c' to look at the cheapest books
+- 'n' to just get the next available book on the page
+- 'q' to exit
+
+Enter your choice: '''
+
+
+def print_best_books():
+    logger.debug('Finding best books by rating...')
+    best_books = sorted(books, key=lambda x: x.rating * -1)[:5]
+    for book in best_books:
+        print(book)
+
+
+def print_cheapest_books():
+    logger.debug('Finding best books by price...')
+    cheapest_books = sorted(books, key=lambda x: x.price)[:5]
+    for book in cheapest_books:
+        print(book)
+
+
+books_generator = (x for x in books)
+
+
+def get_next_book():
+    logger.debug('Getting next book from generator of all books...')
+    print(next(books_generator))
+
+
+user_choices = {
+    'b': print_best_books,
+    'c': print_cheapest_books,
+    'n': get_next_book
+}
+
+
+def menu():
+    user_input = input(USER_CHOICE)
+    while user_input != 'q':
+        logger.debug('User did not choose to exit program.')
+        if user_input in ('b', 'c', 'n'):
+            user_choices[user_input]()
+        else:
+            print('Please choose a valid command.')
+        user_input = input(USER_CHOICE)
+    logger.debug('Terminating program...')
+
+
+menu()
@@ -0,0 +1,30 @@
+import re
+import logging
+
+from locators.all_books_page import AllBooksPageLocators
+from parsers.book import BookParser
+from bs4 import BeautifulSoup
+
+logger = logging.getLogger('scraping.all_books_page')
+
+
+class AllBooksPage:
+    def __init__(self, page):
+        logger.debug('Parsing page content with BeautifulSoup HTML parser.')
+        self.soup = BeautifulSoup(page, 'html.parser')
+
+    @property
+    def books(self):
+        logger.debug(f'Finding all books in the page using `{AllBooksPageLocators.BOOKS}`')
+        return [BookParser(e) for e in self.soup.select(AllBooksPageLocators.BOOKS)]
+
+    @property
+    def page_count(self):
+        logger.debug('Finding all number of catalogue pages available...')
+        content = self.soup.select_one(AllBooksPageLocators.PAGER).string
+        logger.info(f'Found number of catalogue pages available: `{content}`')
+        pattern = 'Page [0-9]+ of ([0-9]+)'
+        matcher = re.search(pattern, content)
+        pages = int(matcher.group(1))
+        logger.info(f'Extracted number of pages as integer: `{pages}`.')
+        return pages
@@ -0,0 +1,72 @@
+import re
+import logging
+
+from locators.book_locators import BookLocators
+
+logger = logging.getLogger('scraping.book_parser')
+
+
+class BookParser:
+    """
+    A class to take in an HTML page or content, and find properties of an item
+    in it.
+    """
+
+    RATINGS = {
+        'One': 1,
+        'Two': 2,
+        'Three': 3,
+        'Four': 4,
+        'Five': 5
+    }
+
+    def __init__(self, parent):
+        logger.debug(f'New book parser created from `{parent}`')
+        self.parent = parent
+
+    def __repr__(self):
+        return f'<Book {self.name} {self.price}, {self.rating} stars>'
+
+    @property
+    def name(self):
+        logger.debug('Finding book name...')
+        locator = BookLocators.NAME_LOCATOR
+        item_name = self.parent.select_one(locator).attrs['title']
+        logger.info(f'Found book name, `{item_name}`.')
+        return item_name
+
+    @property
+    def link(self):
+        logger.debug('Finding book page link...')
+        locator = BookLocators.LINK_LOCATOR
+        item_url = self.parent.select_one(locator).attrs['href']
+        logger.info(f'Found book page link, `{item_url}`.')
+        return item_url
+
+    @property
+    def price(self):
+        logger.debug('Finding book price...')
+        locator = BookLocators.PRICE_LOCATOR
+        item_price = self.parent.select_one(locator).string
+        logger.debug(f'Item price element found, `{item_price}`')
+
+        pattern = '£([0-9]+\.[0-9]+)'
+        matcher = re.search(pattern, item_price)
+        price = float(matcher.group(1))
+        logger.info(f'Found book price, `{price}`.')
+        return price
+
+    @property
+    def rating(self):
+        logger.debug('Finding book rating...')
+        locator = BookLocators.RATING_LOCATOR
+        star_rating_element = self.parent.select_one(locator)
+        classes = star_rating_element.attrs['class']
+        rating_classes = filter(lambda x: x != 'star-rating', classes)
+        rating_class = next(rating_classes)
+
+        logger.debug(f'Found rating class, `{rating_class}`.')
+        logger.debug('Converting to integer for sorting.')
+        rating = BookParser.RATINGS.get(rating_class)
+        logger.info(f'Found book rating, `{rating}`.')
+        return rating
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+logs.txt`
`1`	`2`	`data.db`
`2`	`3`	`*.key`
`3`	`4`	`*.indd`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+class AllBooksPageLocators:`
	`2`	`+ BOOKS = 'div.page_inner section li.col-xs-6.col-sm-4.col-md-3.col-lg-3'`
	`3`	`+ PAGER = 'div.page_inner section ul.pager li.current'`