Create a function to parse amazon urls and reduce code redundancy.

A test for amazon url parsing was added.
This commit is contained in:
Laurent Monin
2013-06-30 17:25:15 +02:00
parent d729e205cf
commit f892444998
5 changed files with 64 additions and 21 deletions

View File

@@ -43,9 +43,6 @@ FPCALC_NAMES = ['fpcalc', 'pyfpcalc']
# Various Artists MBID
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
# Amazon asin url
AMAZON_ASIN_URL_REGEX = re.compile(r'^http://(?:www.)?(.*?)(?:\:[0-9]+)?/.*/([0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)')
# Release formats
RELEASE_FORMATS = {
u'CD': N_('CD'),

View File

@@ -26,9 +26,8 @@ import traceback
import picard.webservice
from picard import config, log
from picard.const import AMAZON_ASIN_URL_REGEX
from picard.metadata import Metadata, is_front_image
from picard.util import partial, mimetype
from picard.util import partial, mimetype, parse_amazon_url
from PyQt4.QtCore import QUrl, QObject
# data transliterated from the perl stuff used to find cover art for the
@@ -282,17 +281,15 @@ def _process_url_relation(try_list, relation):
return False
def _process_asin_relation(try_list, relation):
match = AMAZON_ASIN_URL_REGEX.match(relation.target[0].text)
if match is not None:
asinHost = match.group(1)
asin = match.group(2)
if asinHost in AMAZON_SERVER:
serverInfo = AMAZON_SERVER[asinHost]
amz = parse_amazon_url(relation.target[0].text)
if amz is not None:
if amz['host'] in AMAZON_SERVER:
serverInfo = AMAZON_SERVER[amz['host']]
else:
serverInfo = AMAZON_SERVER['amazon.com']
host = serverInfo['server']
path_l = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'L')
path_m = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'M')
path_l = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'L')
path_m = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'M')
_try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_l)))
_try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_m)))

View File

@@ -19,8 +19,8 @@
import re
from picard import config
from picard.util import format_time, translate_from_sortname
from picard.const import RELEASE_FORMATS, AMAZON_ASIN_URL_REGEX
from picard.util import format_time, translate_from_sortname, parse_amazon_url
from picard.const import RELEASE_FORMATS
_artist_rel_types = {
@@ -98,12 +98,11 @@ def _relations_to_metadata(relation_lists, m):
work_to_metadata(relation.work[0], m)
elif relation_list.target_type == 'url':
for relation in relation_list.relation:
if relation.type == 'amazon asin':
url = relation.target[0].text
match = AMAZON_ASIN_URL_REGEX.match(url)
if match is not None and 'asin' not in m:
m['asin'] = match.group(2)
if relation.type == 'license':
if relation.type == 'amazon asin' and 'asin' not in m:
amz = parse_amazon_url(relation.target[0].text)
if amz is not None:
m['asin'] = amz['asin']
elif relation.type == 'license':
url = relation.target[0].text
m.add('license', url)

View File

@@ -325,3 +325,14 @@ def load_release_type_scores(setting):
def save_release_type_scores(scores):
return " ".join(["%s %.2f" % v for v in scores.iteritems()])
def parse_amazon_url(url):
"""Extract host and asin from an amazon url.
It returns a dict with host and asin keys on success, None else
"""
r = re.compile(r'^http://(?:www.)?(?P<host>.*?)(?:\:[0-9]+)?/.*/(?P<asin>[0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)')
match = r.match(url)
if match is not None:
return match.groupdict()
return None

39
test/test_amazon_urls.py Normal file
View File

@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
import unittest
from picard.util import parse_amazon_url
class ParseAmazonUrlTest(unittest.TestCase):
def test_1(self):
url = 'http://www.amazon.com/dp/020530902X'
expected = {'asin': '020530902X', 'host': 'amazon.com'}
r = parse_amazon_url(url)
self.failUnlessEqual(r, expected)
def test_2(self):
url = 'http://ec1.amazon.co.jp/gp/product/020530902X'
expected = {'asin': '020530902X', 'host': 'ec1.amazon.co.jp'}
r = parse_amazon_url(url)
self.failUnlessEqual(r, expected)
def test_3(self):
url = 'http://amazon.com/Dark-Side-Moon-Pink-Floyd/dp/B004ZN9RWK/ref=sr_1_1?s=music&ie=UTF8&qid=1372605047&sr=1-1&keywords=pink+floyd+dark+side+of+the+moon'
expected = {'asin': 'B004ZN9RWK', 'host': 'amazon.com'}
r = parse_amazon_url(url)
self.failUnlessEqual(r, expected)
def test_4(self):
#incorrect ASIN
url = 'http://www.amazon.com/dp/A20530902X'
expected = None
r = parse_amazon_url(url)
self.failUnlessEqual(r, expected)
def test_5(self):
#incorrect ASIN
url = 'http://www.amazon.com/dp/020530902x'
expected = None
r = parse_amazon_url(url)
self.failUnlessEqual(r, expected)