mirror of
https://github.com/fergalmoran/Chrome2Kindle.git
synced 2025-12-22 17:49:14 +00:00
30 lines
854 B
Python
30 lines
854 B
Python
#!/usr/bin/python
|
|
from BeautifulSoup import BeautifulSoup
|
|
from urlparse import urlsplit
|
|
import urllib2
|
|
|
|
import logging
|
|
class HtmlParser:
|
|
def __init__(self, url):
|
|
self._serverUrl = url
|
|
self._pageContent = urllib2.urlopen(url).read()
|
|
self._soup = BeautifulSoup(''.join(self._pageContent))
|
|
|
|
def Title(self):
|
|
return self._soup.html.head.title.string
|
|
|
|
def ScrapeImages(self):
|
|
try:
|
|
imgTags = self._soup.findAll('img')
|
|
logging.info ('Opened the page...')
|
|
for imgTag in imgTags:
|
|
imgUrl = imgTag['src']
|
|
logging.info(imgUrl)
|
|
|
|
logging.info('Finished parsing...')
|
|
|
|
except ImportError:
|
|
logging.error('Beautiful Soup is not installed')
|
|
else:
|
|
logging.info('Web page retrieved....')
|