mirror of
https://github.com/fergalmoran/picard.git
synced 2026-02-22 23:55:46 +00:00
Create a function to parse amazon urls and reduce code redundancy.
A test for amazon url parsing was added.
This commit is contained in:
@@ -43,9 +43,6 @@ FPCALC_NAMES = ['fpcalc', 'pyfpcalc']
|
||||
# Various Artists MBID
|
||||
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
|
||||
|
||||
# Amazon asin url
|
||||
AMAZON_ASIN_URL_REGEX = re.compile(r'^http://(?:www.)?(.*?)(?:\:[0-9]+)?/.*/([0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)')
|
||||
|
||||
# Release formats
|
||||
RELEASE_FORMATS = {
|
||||
u'CD': N_('CD'),
|
||||
|
||||
@@ -26,9 +26,8 @@ import traceback
|
||||
import picard.webservice
|
||||
|
||||
from picard import config, log
|
||||
from picard.const import AMAZON_ASIN_URL_REGEX
|
||||
from picard.metadata import Metadata, is_front_image
|
||||
from picard.util import partial, mimetype
|
||||
from picard.util import partial, mimetype, parse_amazon_url
|
||||
from PyQt4.QtCore import QUrl, QObject
|
||||
|
||||
# data transliterated from the perl stuff used to find cover art for the
|
||||
@@ -282,17 +281,15 @@ def _process_url_relation(try_list, relation):
|
||||
return False
|
||||
|
||||
def _process_asin_relation(try_list, relation):
|
||||
match = AMAZON_ASIN_URL_REGEX.match(relation.target[0].text)
|
||||
if match is not None:
|
||||
asinHost = match.group(1)
|
||||
asin = match.group(2)
|
||||
if asinHost in AMAZON_SERVER:
|
||||
serverInfo = AMAZON_SERVER[asinHost]
|
||||
amz = parse_amazon_url(relation.target[0].text)
|
||||
if amz is not None:
|
||||
if amz['host'] in AMAZON_SERVER:
|
||||
serverInfo = AMAZON_SERVER[amz['host']]
|
||||
else:
|
||||
serverInfo = AMAZON_SERVER['amazon.com']
|
||||
host = serverInfo['server']
|
||||
path_l = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'L')
|
||||
path_m = AMAZON_IMAGE_PATH % (asin, serverInfo['id'], 'M')
|
||||
path_l = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'L')
|
||||
path_m = AMAZON_IMAGE_PATH % (amz['asin'], serverInfo['id'], 'M')
|
||||
_try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_l)))
|
||||
_try_list_append_image_url(try_list, QUrl("http://%s:%s" % (host, path_m)))
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
|
||||
import re
|
||||
from picard import config
|
||||
from picard.util import format_time, translate_from_sortname
|
||||
from picard.const import RELEASE_FORMATS, AMAZON_ASIN_URL_REGEX
|
||||
from picard.util import format_time, translate_from_sortname, parse_amazon_url
|
||||
from picard.const import RELEASE_FORMATS
|
||||
|
||||
|
||||
_artist_rel_types = {
|
||||
@@ -98,12 +98,11 @@ def _relations_to_metadata(relation_lists, m):
|
||||
work_to_metadata(relation.work[0], m)
|
||||
elif relation_list.target_type == 'url':
|
||||
for relation in relation_list.relation:
|
||||
if relation.type == 'amazon asin':
|
||||
url = relation.target[0].text
|
||||
match = AMAZON_ASIN_URL_REGEX.match(url)
|
||||
if match is not None and 'asin' not in m:
|
||||
m['asin'] = match.group(2)
|
||||
if relation.type == 'license':
|
||||
if relation.type == 'amazon asin' and 'asin' not in m:
|
||||
amz = parse_amazon_url(relation.target[0].text)
|
||||
if amz is not None:
|
||||
m['asin'] = amz['asin']
|
||||
elif relation.type == 'license':
|
||||
url = relation.target[0].text
|
||||
m.add('license', url)
|
||||
|
||||
|
||||
@@ -325,3 +325,14 @@ def load_release_type_scores(setting):
|
||||
|
||||
def save_release_type_scores(scores):
|
||||
return " ".join(["%s %.2f" % v for v in scores.iteritems()])
|
||||
|
||||
|
||||
def parse_amazon_url(url):
|
||||
"""Extract host and asin from an amazon url.
|
||||
It returns a dict with host and asin keys on success, None else
|
||||
"""
|
||||
r = re.compile(r'^http://(?:www.)?(?P<host>.*?)(?:\:[0-9]+)?/.*/(?P<asin>[0-9B][0-9A-Z]{9})(?:[^0-9A-Z]|$)')
|
||||
match = r.match(url)
|
||||
if match is not None:
|
||||
return match.groupdict()
|
||||
return None
|
||||
|
||||
39
test/test_amazon_urls.py
Normal file
39
test/test_amazon_urls.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
from picard.util import parse_amazon_url
|
||||
|
||||
|
||||
class ParseAmazonUrlTest(unittest.TestCase):
|
||||
|
||||
def test_1(self):
|
||||
url = 'http://www.amazon.com/dp/020530902X'
|
||||
expected = {'asin': '020530902X', 'host': 'amazon.com'}
|
||||
r = parse_amazon_url(url)
|
||||
self.failUnlessEqual(r, expected)
|
||||
|
||||
def test_2(self):
|
||||
url = 'http://ec1.amazon.co.jp/gp/product/020530902X'
|
||||
expected = {'asin': '020530902X', 'host': 'ec1.amazon.co.jp'}
|
||||
r = parse_amazon_url(url)
|
||||
self.failUnlessEqual(r, expected)
|
||||
|
||||
def test_3(self):
|
||||
url = 'http://amazon.com/Dark-Side-Moon-Pink-Floyd/dp/B004ZN9RWK/ref=sr_1_1?s=music&ie=UTF8&qid=1372605047&sr=1-1&keywords=pink+floyd+dark+side+of+the+moon'
|
||||
expected = {'asin': 'B004ZN9RWK', 'host': 'amazon.com'}
|
||||
r = parse_amazon_url(url)
|
||||
self.failUnlessEqual(r, expected)
|
||||
|
||||
def test_4(self):
|
||||
#incorrect ASIN
|
||||
url = 'http://www.amazon.com/dp/A20530902X'
|
||||
expected = None
|
||||
r = parse_amazon_url(url)
|
||||
self.failUnlessEqual(r, expected)
|
||||
|
||||
def test_5(self):
|
||||
#incorrect ASIN
|
||||
url = 'http://www.amazon.com/dp/020530902x'
|
||||
expected = None
|
||||
r = parse_amazon_url(url)
|
||||
self.failUnlessEqual(r, expected)
|
||||
Reference in New Issue
Block a user