diff --git a/.all-contributorsrc b/.all-contributorsrc
index 2d37c9b..12fcc32 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -78,6 +78,15 @@
"review",
"ideas"
]
+ },
+ {
+ "login": "rettinghaus",
+ "name": "Klaus Rettinghaus",
+ "avatar_url": "https://avatars.githubusercontent.com/u/7693447?v=4",
+ "profile": "https://github.com/rettinghaus",
+ "contributions": [
+ "maintenance"
+ ]
}
],
"contributorsPerLine": 7,
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
index 9a373b6..3d48c6f 100644
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@@ -15,14 +15,14 @@ jobs:
working-directory: .
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v5
- name: Set up Python 3.12
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: "pyproject.toml"
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index bdaab28..6e42013 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -21,9 +21,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v5
- name: Set up Python
- uses: actions/setup-python@v3
+ uses: actions/setup-python@v6
with:
python-version: '3.x'
- name: Install dependencies
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 3a05faf..fc93c1b 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -30,11 +30,11 @@ jobs:
working-directory: .
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v5
# use github python action instead of uv to take advantage of caching
- name: Set up Python ${{ matrix.python }}
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
cache: 'pip'
@@ -54,7 +54,7 @@ jobs:
if: ${{ matrix.python == env.COV_PYTHON_VERSION }}
- name: Upload coverage to Codecov
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
if: ${{ matrix.python == env.COV_PYTHON_VERSION }}
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 9180e0b..5e4e3c7 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -20,6 +20,7 @@ We use [All Contributors](https://allcontributors.org/) because we recognize tha
 Julia Damerow 💻 👀 ⚠️ 📋 🤔 |
 Malte Vogl 💻 👀 ⚠️ 📖 |
 Taylor Arnold 👀 🤔 |
+  Klaus Rettinghaus 🚧 |
diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst
index b93b81e..719d065 100644
--- a/docs/undate/converters.rst
+++ b/docs/undate/converters.rst
@@ -4,10 +4,20 @@ Converters
Overview
--------
+
+.. automodule:: undate.converters
+
+-----
+
.. automodule:: undate.converters.base
:members:
:undoc-members:
+
+.. autoclass:: undate.converters.combined.OmnibusDateConverter
+ :members:
+
+
Formats
--------
@@ -33,6 +43,8 @@ Extended Date-Time Format (EDTF)
Calendars
---------
+.. automodule:: undate.converters.calendars
+
Gregorian
^^^^^^^^^
diff --git a/src/undate/__init__.py b/src/undate/__init__.py
index 2da3434..44e9b04 100644
--- a/src/undate/__init__.py
+++ b/src/undate/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.5.2"
+__version__ = "0.6.0.dev0"
from undate.date import DatePrecision, UnDelta
from undate.undate import Undate, Calendar
diff --git a/src/undate/converters/__init__.py b/src/undate/converters/__init__.py
index e13532d..c13f2f1 100644
--- a/src/undate/converters/__init__.py
+++ b/src/undate/converters/__init__.py
@@ -1 +1,29 @@
-from undate.converters.base import BaseDateConverter as BaseDateConverter
+"""
+Converter classes add support for parsing and serializing dates
+in a variety of formats. A subset of these are calendar converters
+(:mod:`undate.converters.calendar`), which means they support both parsing
+and conversion from an alternate calendar to a common Gregorian
+for comparison across dates.
+
+To parse a date with a supported converter, use the ``Undate`` class method
+:meth:`~undate.undate.Undate.parse` and specify the date as a string
+with the desired format or calendar, e.g.
+
+.. code-block::
+
+ Undate.parse("2001-05", "EDTF")
+ Undate.parse("7 Heshvan 5425", "Hebrew")
+
+For converters that support it, you can also serialize a date in a specified
+format with ``Undate`` class method :meth:`~undate.undate.Undate.format`:
+
+.. code-block::
+
+ Undate.parse("Rabīʿ ath-Thānī 343", "Islamic").format("EDTF")
+
+
+"""
+
+from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH
+
+__all__ = ["BaseDateConverter", "GRAMMAR_FILE_PATH"]
diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py
index 1cf1b6d..93a63a7 100644
--- a/src/undate/converters/base.py
+++ b/src/undate/converters/base.py
@@ -44,6 +44,7 @@
import importlib
import logging
+import pathlib
import pkgutil
from functools import cache
from typing import Dict, Type
@@ -53,6 +54,10 @@
logger = logging.getLogger(__name__)
+#: Path to parser grammar files
+GRAMMAR_FILE_PATH = pathlib.Path(__file__).parent / "grammars"
+
+
class BaseDateConverter:
"""Base class for parsing, formatting, and converting dates to handle
specific formats and different calendars."""
diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py
index 5654f60..3056f85 100644
--- a/src/undate/converters/calendars/hebrew/parser.py
+++ b/src/undate/converters/calendars/hebrew/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
from lark import Lark
-grammar_path = pathlib.Path(__file__).parent / "hebrew.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "hebrew.lark"
with open(grammar_path) as grammar:
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py
index 8880434..1ca8c39 100644
--- a/src/undate/converters/calendars/hebrew/transformer.py
+++ b/src/undate/converters/calendars/hebrew/transformer.py
@@ -26,12 +26,13 @@ def hebrew_date(self, items):
# initialize and return an undate with year, month, day and
# configured calendar (hebrew by default)
+ # NOTE: use self.calendar so Seleucid can extend more easily
return Undate(**parts, calendar=self.calendar)
- # year translation is not needed since we want a tree with name year
- # this is equivalent to a no-op
- # def year(self, items):
- # return Tree(data="year", children=[items[0]])
+ def year(self, items):
+ # combine multiple parts into a single string
+ value = "".join([str(i) for i in items])
+ return Tree(data="year", children=[value])
def month(self, items):
# month has a nested tree for the rule and the value
diff --git a/src/undate/converters/calendars/islamic/parser.py b/src/undate/converters/calendars/islamic/parser.py
index b103711..61a0cf0 100644
--- a/src/undate/converters/calendars/islamic/parser.py
+++ b/src/undate/converters/calendars/islamic/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
from lark import Lark
-grammar_path = pathlib.Path(__file__).parent / "islamic.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "islamic.lark"
with open(grammar_path) as grammar:
# NOTE: LALR parser is faster but can't be used due to ambiguity between years and days
diff --git a/src/undate/converters/calendars/islamic/transformer.py b/src/undate/converters/calendars/islamic/transformer.py
index 9ffce36..7310d86 100644
--- a/src/undate/converters/calendars/islamic/transformer.py
+++ b/src/undate/converters/calendars/islamic/transformer.py
@@ -28,8 +28,17 @@ def islamic_date(self, items):
# year translation is not needed since we want a tree with name year
# this is equivalent to a no-op
- # def year(self, items):
- # return Tree(data="year", children=[items[0]])
+ def year(self, items):
+ # combine multiple parts into a single string
+ # (for some reason we're getting an anonymous token in combined parser)
+ value = "".join([str(i) for i in items])
+ return Tree(data="year", children=[value])
+
+ def day(self, items):
+ # combine multiple parts into a single string
+ # (for some reason we're getting an anonymous token in combined parser)
+ value = "".join([str(i) for i in items])
+ return Tree(data="day", children=[value])
def month(self, items):
# month has a nested tree for the rule and the value
diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py
new file mode 100644
index 0000000..54d66a5
--- /dev/null
+++ b/src/undate/converters/combined.py
@@ -0,0 +1,85 @@
+"""
+**Experimental** combined parser. Supports EDTF, Hebrew, and Hijri
+where dates are unambiguous. (Year-only dates are parsed as EDTF in
+Gregorian calendar.)
+"""
+
+from typing import Union
+
+from lark import Lark
+from lark.exceptions import UnexpectedCharacters
+from lark.visitors import Transformer, merge_transformers
+
+from undate import Undate, UndateInterval
+from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH
+from undate.converters.edtf.transformer import EDTFTransformer
+from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
+from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
+
+
+class CombinedDateTransformer(Transformer):
+ def start(self, children):
+ # trigger the transformer for the appropriate part of the grammar
+ return children
+
+
+# NOTE: currently year-only dates in combined parser are interpreted as
+# EDTF and use Gregorian calendar.
+# In future, we could refine by adding calendar names & abbreviations
+# to the parser in order to recognize years from other calendars.
+
+combined_transformer = merge_transformers(
+ CombinedDateTransformer(),
+ edtf=EDTFTransformer(),
+ hebrew=HebrewDateTransformer(),
+ islamic=IslamicDateTransformer(),
+)
+
+
+# open based on filename so we can specify relative import path based on grammar file
+parser = Lark.open(
+ str(GRAMMAR_FILE_PATH / "combined.lark"), rel_to=__file__, strict=True
+)
+
+
+class OmnibusDateConverter(BaseDateConverter):
+ """
+ Combination parser that aggregates existing parser grammars.
+ Currently supports EDTF, Hebrew, and Hijri where dates are unambiguous.
+ (Year-only dates are parsed as EDTF in Gregorian calendar.)
+
+ Does not support serialization.
+
+ Example usage::
+
+ Undate.parse("Tammuz 4816", "omnibus")
+
+ """
+
+ #: converter name: omnibus
+ name: str = "omnibus"
+
+ def __init__(self):
+ self.transformer = combined_transformer
+
+ def parse(self, value: str) -> Union[Undate, UndateInterval]:
+ """
+ Parse a string in a supported format and return an :class:`~undate.undate.Undate`
+ or :class:`~undate.undate.UndateInterval`.
+ """
+ if not value:
+ raise ValueError("Parsing empty/unset string is not supported")
+
+ # parse the input string, then transform to undate object
+ try:
+ parsetree = parser.parse(value)
+ # transform returns a list; we want the first item in the list
+ return self.transformer.transform(parsetree)[0]
+ except UnexpectedCharacters:
+ raise ValueError(
+ "Parsing failed: '%s' is not in a recognized date format" % value
+ )
+
+ def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
+ "Not supported by this converter. Will raise :class:`ValueError`"
+ raise ValueError("Omnibus converter does not support serialization")
diff --git a/src/undate/converters/edtf/parser.py b/src/undate/converters/edtf/parser.py
index 27c2bd6..bc8f0ef 100644
--- a/src/undate/converters/edtf/parser.py
+++ b/src/undate/converters/edtf/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
from lark import Lark
-grammar_path = pathlib.Path(__file__).parent / "edtf.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "edtf.lark"
with open(grammar_path) as grammar:
edtf_parser = Lark(grammar.read(), start="edtf")
diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py
index 0b1de76..3167248 100644
--- a/src/undate/converters/edtf/transformer.py
+++ b/src/undate/converters/edtf/transformer.py
@@ -66,7 +66,10 @@ def day_unspecified(self, items):
def date_level1(self, items):
return self.date(items)
- # year (including negative years) use default transformation
+ def year(self, items):
+ # combine parts (numeric & unknown) into a single string
+ value = "".join(self.get_values(items))
+ return Tree(data="year", children=[value])
def year_fivedigitsplus(self, items):
# strip off the leading Y and convert to integer
diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark
new file mode 100644
index 0000000..0e77b5c
--- /dev/null
+++ b/src/undate/converters/grammars/combined.lark
@@ -0,0 +1,32 @@
+%import common.WS
+%ignore WS
+
+start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date )
+
+// Renaming of the import variables is required, as they receive the namespace of this file.
+// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
+
+// All grammars are in the same file, so we can use relative imports
+
+// relative import from edtf.lark
+%import .edtf.edtf -> edtf__start
+
+// relative import from hebrew.lark
+%import .hebrew.hebrew_date -> hebrew__hebrew_date
+%import .hebrew.day -> hebrew__day
+%import .hebrew.month -> hebrew__month
+%import .hebrew.year -> hebrew__year
+
+// relative import from islamic.lark
+%import .islamic.islamic_date -> islamic__islamic_date
+%import .islamic.day -> islamic__day
+%import .islamic.month -> islamic__month
+%import .islamic.year -> islamic__year
+
+
+// override hebrew date to omit year-only, since year without calendar is ambiguous
+// NOTE: potentially support year with calendar label
+%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year
+
+// same for islamic date, year alone is ambiguous
+%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year
diff --git a/src/undate/converters/edtf/edtf.lark b/src/undate/converters/grammars/edtf.lark
similarity index 100%
rename from src/undate/converters/edtf/edtf.lark
rename to src/undate/converters/grammars/edtf.lark
diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/grammars/hebrew.lark
similarity index 85%
rename from src/undate/converters/calendars/hebrew/hebrew.lark
rename to src/undate/converters/grammars/hebrew.lark
index 6f4244c..118ed98 100644
--- a/src/undate/converters/calendars/hebrew/hebrew.lark
+++ b/src/undate/converters/grammars/hebrew.lark
@@ -11,23 +11,23 @@ hebrew_date: weekday? day month comma? year | month year | year
// PGP dates use qualifiers like "first decade of" (for beginning of month)
// "first third of", seasons (can look for more examples)
-// Hebrew calendar starts with year 1 in 3761 BCE
+// Hebrew calendar starts with year 1 in 3761 BCE
year: /\d+/
// months
month: month_1
| month_2
- | month_3
- | month_4
- | month_5
- | month_6
- | month_7
- | month_8
- | month_9
- | month_10
- | month_11
- | month_12
- | month_13
+ | month_3
+ | month_4
+ | month_5
+ | month_6
+ | month_7
+ | month_8
+ | month_9
+ | month_10
+ | month_11
+ | month_12
+ | month_13
// months have 29 or 30 days; we do not expect leading zeroes
day: /[1-9]/ | /[12][0-9]/ | /30/
diff --git a/src/undate/converters/calendars/islamic/islamic.lark b/src/undate/converters/grammars/islamic.lark
similarity index 100%
rename from src/undate/converters/calendars/islamic/islamic.lark
rename to src/undate/converters/grammars/islamic.lark
diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py
new file mode 100644
index 0000000..717a16e
--- /dev/null
+++ b/tests/test_converters/test_combined_parser.py
@@ -0,0 +1,54 @@
+import pytest
+
+from undate.converters.combined import parser, combined_transformer
+
+from undate import Undate, UndateInterval
+
+# test that valid dates can be parsed
+
+testcases = [
+ # EDTF
+ ("1984", Undate(1984)),
+ ("201X", Undate("201X")),
+ ("20XX", Undate("20XX")),
+ ("2004-XX", Undate(2004, "XX")),
+ ("1000/2000", UndateInterval(Undate(1000), Undate(2000))),
+ # Hebrew / Anno Mundi calendar
+ ("Tammuz 4816", Undate(4816, 4, calendar="Hebrew")),
+ # Islamic / Hijri calendar
+ ("Jumādā I 1243", Undate(1243, 5, calendar="Islamic")),
+ ("7 Jumādā I 1243", Undate(1243, 5, 7, calendar="Islamic")),
+ ("14 Rabīʿ I 901", Undate(901, 3, 14, calendar="Islamic")),
+]
+
+
+@pytest.mark.parametrize("date_string,expected", testcases)
+def test_transform(date_string, expected):
+ # test the transformer directly
+ transformer = combined_transformer
+ # parse the input string, then transform to undate object
+ parsetree = parser.parse(date_string)
+ # since the same unknown date is not considered strictly equal,
+ # compare object representations
+ transformed_date = transformer.transform(parsetree)
+ assert repr(transformed_date[0]) == repr(expected)
+
+
+@pytest.mark.parametrize("date_string,expected", testcases)
+def test_converter(date_string, expected):
+ # should work the same way when called through the converter class
+ assert repr(Undate.parse(date_string, "omnibus")) == repr(expected)
+
+
+def test_parse_errors():
+ # empty string not supported
+ with pytest.raises(ValueError, match="not supported"):
+ Undate.parse("", "omnibus")
+
+ with pytest.raises(ValueError, match="not in a recognized date format"):
+ Undate.parse("Monday 2023", "omnibus")
+
+
+def test_no_serialize():
+ with pytest.raises(ValueError, match="does not support"):
+ Undate("2022").format("omnibus")