diff --git a/picard/const.py b/picard/const.py index 8006b88f1..399a32188 100644 --- a/picard/const.py +++ b/picard/const.py @@ -43,9 +43,6 @@ FPCALC_NAMES = ['fpcalc', 'pyfpcalc'] # Various Artists MBID VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -# Amazon asin url -AMAZON_ASIN_URL_REGEX = re.compile(r'^http://(?:www.)?(.*?)(?:\:[0-9]+)?/.*/([0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)') - # Release formats RELEASE_FORMATS = { u'CD': N_('CD'), diff --git a/picard/coverart.py b/picard/coverart.py index 1a571ace4..c2c37001a 100644 --- a/picard/coverart.py +++ b/picard/coverart.py @@ -26,9 +26,8 @@ import traceback import picard.webservice from picard import config, log -from picard.const import AMAZON_ASIN_URL_REGEX from picard.metadata import Metadata, is_front_image -from picard.util import partial, mimetype +from picard.util import partial, mimetype, parse_amazon_url from PyQt4.QtCore import QUrl, QObject # data transliterated from the perl stuff used to find cover art for the @@ -282,17 +281,15 @@ def _process_url_relation(try_list, relation): return False def _process_asin_relation(try_list, relation): - match = AMAZON_ASIN_URL_REGEX.match(relation.target[0].text) - if match is not None: - asinHost = match.group(1) - asin = match.group(2) - if asinHost in AMAZON_SERVER: - serverInfo = AMAZON_SERVER[asinHost] + amz = parse_amazon_url(relation.target[0].text) + if amz is not None: + if amz['host'] in AMAZON_SERVER: + serverInfo = AMAZON_SERVER[amz['host']] else: serverInfo = AMAZON_SERVER['amazon.com'] host = serverInfo['server'] - path_l = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'L') - path_m = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'M') + path_l = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'L') + path_m = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'M') _try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_l))) _try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_m))) diff --git a/picard/mbxml.py b/picard/mbxml.py index bf3405ae5..bb08b91c2 100644 --- a/picard/mbxml.py +++ b/picard/mbxml.py @@ -19,8 +19,8 @@ import re from picard import config -from picard.util import format_time, translate_from_sortname -from picard.const import RELEASE_FORMATS, AMAZON_ASIN_URL_REGEX +from picard.util import format_time, translate_from_sortname, parse_amazon_url +from picard.const import RELEASE_FORMATS _artist_rel_types = { @@ -98,12 +98,11 @@ def _relations_to_metadata(relation_lists, m): work_to_metadata(relation.work[0], m) elif relation_list.target_type == 'url': for relation in relation_list.relation: - if relation.type == 'amazon asin': - url = relation.target[0].text - match = AMAZON_ASIN_URL_REGEX.match(url) - if match is not None and 'asin' not in m: - m['asin'] = match.group(2) - if relation.type == 'license': + if relation.type == 'amazon asin' and 'asin' not in m: + amz = parse_amazon_url(relation.target[0].text) + if amz is not None: + m['asin'] = amz['asin'] + elif relation.type == 'license': url = relation.target[0].text m.add('license', url) diff --git a/picard/util/__init__.py b/picard/util/__init__.py index 19495e4f5..8851e0113 100644 --- a/picard/util/__init__.py +++ b/picard/util/__init__.py @@ -325,3 +325,14 @@ def load_release_type_scores(setting): def save_release_type_scores(scores): return " ".join(["%s %.2f" % v for v in scores.iteritems()]) + + +def parse_amazon_url(url): + """Extract host and asin from an amazon url. + It returns a dict with host and asin keys on success, None else + """ + r = re.compile(r'^http://(?:www.)?(?P.*?)(?:\:[0-9]+)?/.*/(?P[0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)') + match = r.match(url) + if match is not None: + return match.groupdict() + return None diff --git a/test/test_amazon_urls.py b/test/test_amazon_urls.py new file mode 100644 index 000000000..de19306b6 --- /dev/null +++ b/test/test_amazon_urls.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +import unittest +from picard.util import parse_amazon_url + + +class ParseAmazonUrlTest(unittest.TestCase): + + def test_1(self): + url = 'http://www.amazon.com/dp/020530902X' + expected = {'asin': '020530902X', 'host': 'amazon.com'} + r = parse_amazon_url(url) + self.failUnlessEqual(r, expected) + + def test_2(self): + url = 'http://ec1.amazon.co.jp/gp/product/020530902X' + expected = {'asin': '020530902X', 'host': 'ec1.amazon.co.jp'} + r = parse_amazon_url(url) + self.failUnlessEqual(r, expected) + + def test_3(self): + url = 'http://amazon.com/Dark-Side-Moon-Pink-Floyd/dp/B004ZN9RWK/ref=sr_1_1?s=music&ie=UTF8&qid=1372605047&sr=1-1&keywords=pink+floyd+dark+side+of+the+moon' + expected = {'asin': 'B004ZN9RWK', 'host': 'amazon.com'} + r = parse_amazon_url(url) + self.failUnlessEqual(r, expected) + + def test_4(self): + #incorrect ASIN + url = 'http://www.amazon.com/dp/A20530902X' + expected = None + r = parse_amazon_url(url) + self.failUnlessEqual(r, expected) + + def test_5(self): + #incorrect ASIN + url = 'http://www.amazon.com/dp/020530902x' + expected = None + r = parse_amazon_url(url) + self.failUnlessEqual(r, expected)