Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not parse dates as prices. Sort imports. #19

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
28 changes: 23 additions & 5 deletions price_parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# -*- coding: utf-8 -*-

import re
import string
from typing import Callable, Optional, Pattern, List, Tuple
from datetime import datetime
from decimal import Decimal, InvalidOperation
from typing import Callable, List, Optional, Pattern, Tuple

import attr

from ._currencies import (CURRENCY_CODES, CURRENCY_NATIONAL_SYMBOLS,
CURRENCY_SYMBOLS)

Expand Down Expand Up @@ -69,7 +72,7 @@ def or_regex(symbols: List[str]) -> Pattern:

# unique currency symbols
'$', '€', '£', 'zł', 'Zł', 'Kč', '₽', '¥', '¥',
'฿', 'դր.', 'դր', '₦', '₴', '₱', '৳', '₭', '₪', '﷼', '៛', '₩', '₫', '₡',
'฿', 'դր.', 'դր', '₦', '₴', '₱', '৳', '₭', '₪', '﷼', '៛', '₩', '₫', '₡',
'টকা', 'ƒ', '₲', '؋', '₮', 'नेरू', '₨',
'₶', '₾', '֏', 'ރ', '৲', '૱', '௹', '₠', '₢', '₣', '₤', '₧', '₯',
'₰', '₳', '₷', '₸', '₹', '₺', '₼', '₾', '₿', 'ℳ',
Expand All @@ -82,7 +85,7 @@ def or_regex(symbols: List[str]) -> Pattern:

# other common symbols, which we consider unambiguous
'EUR', 'euro', 'eur', 'CHF', 'DKK', 'Rp', 'lei',
'руб.', 'руб', 'грн.', 'грн', 'дин.', 'Dinara', 'динар', 'лв.', 'лв',
'руб.', 'руб', 'грн.', 'грн', 'дин.', 'Dinara', 'динар', 'лв.', 'лв',
'р.', 'тңг', 'тңг.', 'ман.',
]

Expand Down Expand Up @@ -139,8 +142,8 @@ def extract_currency_symbol(price: Optional[str],
if price and '$' in price:
methods.insert(0, (_search_dollar_code, price))

for meth, attr in methods:
m = meth(attr) if attr else None
for meth, attrib in methods:
m = meth(attrib) if attrib else None
if m:
return m.group(0)

Expand Down Expand Up @@ -180,6 +183,10 @@ def extract_price_text(price: str) -> Optional[str]:
>>> extract_price_text("50")
'50'
"""

if date_format(price):
bulatbulat48 marked this conversation as resolved.
Show resolved Hide resolved
return None

if price.count('€') == 1:
m = re.search(r"""
[\d\s.,]*?\d # number, probably with thousand separators
Expand Down Expand Up @@ -283,3 +290,14 @@ def parse_number(num: str) -> Optional[Decimal]:
return Decimal(num)
except InvalidOperation:
return None


def date_format(price):
for fmt in ['%d.%m.%Y', '%B, %Y', '%b, %Y']:
try:
date = datetime.strptime(price, fmt)
if isinstance(date, datetime):
bulatbulat48 marked this conversation as resolved.
Show resolved Hide resolved
return date
except (ValueError, TypeError):
continue
return None
20 changes: 19 additions & 1 deletion tests/test_price_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
we've found in a wild; PRICE_PARSING_EXAMPLES_NEW is a list of tests for
new features. New tests should probably go these two lists.
"""
from typing import Optional, Union
from datetime import datetime
from decimal import Decimal
from typing import Optional, Union

import pytest

from price_parser import Price
from price_parser.parser import date_format


class Example(Price):
Expand Down Expand Up @@ -1986,3 +1988,19 @@ def test_parsing(example: Example):
)
def test_price_amount_float(amount, amount_float):
assert Price(amount, None, None).amount_float == amount_float


@pytest.mark.parametrize(
"price, result",
[
('10.04.2004', datetime(2004, 4, 10, 0, 0)),
('July, 2004', datetime(2004, 7, 1, 0, 0)),
('Jul, 2004', datetime(2004, 7, 1, 0, 0)),
('200', None),
('2004', None),
(2004, None),
(10.2014, None),
]
)
def test_date_format(price, result):
assert date_format(price) == result
bulatbulat48 marked this conversation as resolved.
Show resolved Hide resolved