Files
Chrome2Kindle/server/HtmlParser.py
Fergal Moran f8c6e8da3d Initial upload
2010-10-21 21:47:26 +01:00

30 lines
854 B
Python

#!/usr/bin/python
from BeautifulSoup import BeautifulSoup
from urlparse import urlsplit
import urllib2
import logging
class HtmlParser:
def __init__(self, url):
self._serverUrl = url
self._pageContent = urllib2.urlopen(url).read()
self._soup = BeautifulSoup(''.join(self._pageContent))
def Title(self):
return self._soup.html.head.title.string
def ScrapeImages(self):
try:
imgTags = self._soup.findAll('img')
logging.info ('Opened the page...')
for imgTag in imgTags:
imgUrl = imgTag['src']
logging.info(imgUrl)
logging.info('Finished parsing...')
except ImportError:
logging.error('Beautiful Soup is not installed')
else:
logging.info('Web page retrieved....')