From 86b94517f1df7d705564d5a93e0d0c431ccd9120 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Apr 2021 18:52:28 +0200 Subject: [PATCH 001/135] Enable packratting for pyparser Delivers significant performance improvements by caching previously computed results. --- edtf/parser/grammar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index c028c6e..d612c5f 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,5 +1,9 @@ from pyparsing import Literal as L, ParseException, Optional, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums + ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, ParserElement + +# From the pyparsing performance improvement tips: +# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips +ParserElement.enablePackrat() # (* ************************** Level 0 *************************** *) from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ From 7fdf8dd8b649a5085d8f2aed3b66a8734f2ce915 Mon Sep 17 00:00:00 2001 From: jacobcolyvan Date: Mon, 26 Jul 2021 12:29:25 +1000 Subject: [PATCH 002/135] #37 update for Django 3.x compat --- edtf/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 83d10a7..52b9171 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -53,7 +53,7 @@ def deconstruct(self): del kwargs["max_length"] return name, path, args, kwargs - def from_db_value(self, value, expression, connection, context): + def from_db_value(self, value, expression, connection, context=None): # Converting values to Python objects if not value: return None From 6e4a627df5447b76db492b1603f95bbd55524346 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Fri, 26 Apr 2024 15:43:38 +0200 Subject: [PATCH 003/135] Minor updates --- edtf/natlang/en.py | 3 ++- poetry.lock | 45 +++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 18 ++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ec7842b..5263e07 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -89,6 +89,7 @@ def text_to_edtf(text): is_before = re.findall(r'\bbefore\b', t) is_before = is_before or re.findall(r'\bearlier\b', t) + is_before = is_before or re.findall(r'\baprés\b', t) is_after = re.findall(r'\bafter\b', t) is_after = is_after or re.findall(r'\bsince\b', t) @@ -133,7 +134,7 @@ def text_to_edtf_date(text): is_approximate = is_approximate or re.findall(r'\bcirca\b', t) # the word 'approx'/'around'/'about' anywhere is_approximate = is_approximate or \ - re.findall(r'\b(approx|around|about)', t) + re.findall(r'\b(approx|approximately|around|about)', t) # a ~ before a year-ish number is_approximate = is_approximate or re.findall(r'\b~\d{4}', t) # a ~ at the beginning diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..745843e --- /dev/null +++ b/poetry.lock @@ -0,0 +1,45 @@ +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. + +[[package]] +name = "pyparsing" +version = "3.1.2" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, + {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "822c6f7ddf2552d097c1bfc8399a2492c845c74cb4576a423adf3ad62850ffc3" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f203360 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.poetry] +name = "python-edtf" +version = "0.1.0" +description = "" +authors = ["Andrew Hankinson "] +readme = "README.md" +packages = [{include = "python_edtf"}] + +[tool.poetry.dependencies] +python = "^3.11" +python-dateutil = "^2.9.0.post0" +pyparsing = "^3.1.2" +six = "^1.16.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" From 80fdd60cbb590d7139341293185628d6aa8cac5b Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Fri, 26 Apr 2024 15:49:58 +0200 Subject: [PATCH 004/135] Update dependency management --- pyproject.toml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f203360..f1d7c5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "python-edtf" +name = "edtf" version = "0.1.0" description = "" authors = ["Andrew Hankinson "] diff --git a/setup.py b/setup.py index f0f1849..f2cc7d5 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ from __future__ import print_function import setuptools -import sys def readme(): with open('README.md') as f: From c12d759732d393ac66faa462b8d61b057c675d17 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Fri, 26 Apr 2024 15:55:52 +0200 Subject: [PATCH 005/135] Deps --- poetry.lock | 4 ++-- pyproject.toml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 745843e..c4b40b6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -41,5 +41,5 @@ files = [ [metadata] lock-version = "2.0" -python-versions = "^3.11" -content-hash = "822c6f7ddf2552d097c1bfc8399a2492c845c74cb4576a423adf3ad62850ffc3" +python-versions = "^3.9" +content-hash = "e6be32f86f1a6af0695f6846b57ed289e015b5634c7f574c45800095a84e2200" diff --git a/pyproject.toml b/pyproject.toml index f1d7c5f..9af9ee4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [tool.poetry] name = "edtf" -version = "0.1.0" +version = "4.0.1+enh" description = "" authors = ["Andrew Hankinson "] readme = "README.md" -packages = [{include = "python_edtf"}] +packages = [{include = "edtf"}] [tool.poetry.dependencies] -python = "^3.11" +python = "^3.9" python-dateutil = "^2.9.0.post0" pyparsing = "^3.1.2" six = "^1.16.0" From a58ee106780f8a7d96dc4926038d189eb563a884 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Wed, 8 May 2024 17:24:53 +1000 Subject: [PATCH 006/135] Add dummy GH Action to activate Actions for the repository --- .github/workflows/ci.yml | 34 ++++++++++++++++++++++++++++++++++ .github/workflows/dummy.yml | 8 ++++++++ 2 files changed, 42 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/dummy.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..34cbabc --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI + +on: + workflow_dispatch: + pull_request: + +jobs: + python-unit: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + defaults: + run: + working-directory: . + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[test] + + - name: Run unit tests + run: | + pytest diff --git a/.github/workflows/dummy.yml b/.github/workflows/dummy.yml new file mode 100644 index 0000000..af2aeba --- /dev/null +++ b/.github/workflows/dummy.yml @@ -0,0 +1,8 @@ +name: dummy-github-action + +on: [push] +jobs: + print-message: + runs-on: ubuntu-latest + steps: + - run: echo "Dummy Action to initialise Actions for the repository" From ee4b21e00f884ee8adea122128e89018da8d6491 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 18:39:28 -0400 Subject: [PATCH 007/135] Update the natural language parser to work with 2018 spec In the parser: - Update regular expressions for SHORT_YEAR_RE and LONG_YEAR_RE to use X instead of x and u and Y instead of y - Replaced`unknown` with null as per the 2018 spec. It does not look like python-edtf currently has open intervals (`open` before, `..` now)? - Replaced `?~` with `%` In the tests: - eliminate masked precision - no u/x just X for unknown regardless of why the data is missing - replace unknown with null - replace ~? with % --- edtf/natlang/en.py | 34 +++++----- edtf/natlang/tests.py | 143 +++++++++++++++++++----------------------- 2 files changed, 84 insertions(+), 93 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ff83034..213d17f 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -12,8 +12,8 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\du])([\dxu])([\dxu])([\dxu])' -LONG_YEAR_RE = r'y(-?)([1-9]\d\d\d\d+)' +SHORT_YEAR_RE = r'(-?)([\dX])([\dX])([\dX])([\dX])' +LONG_YEAR_RE = r'Y(-?)([1-9]\d\d\d\d+)' CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' @@ -29,7 +29,7 @@ def text_to_edtf(text): Generate EDTF string equivalent of a given natural language date string. """ if not text: - return + return None t = text.lower() @@ -95,9 +95,9 @@ def text_to_edtf(text): is_after = is_after or re.findall(r'\blater\b', t) if is_before: - result = u"unknown/%s" % result + result = f"/{result}" # unknown is replaced with null for intervals elif is_after: - result = u"%s/unknown" % result + result = f"{result}/" # unknown is replaced with null for intervals return result @@ -151,7 +151,7 @@ def text_to_edtf_date(text): # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dxx" % (int(is_century[0][0]) - 1,) + result = "%02dXX" % (int(is_century[0][0]) - 1,) is_approximate = is_approximate or \ re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) @@ -222,12 +222,12 @@ def text_to_edtf_date(text): # a century or a decade. if i == 2 and could_be_century and \ not (is_approximate or is_uncertain): - result += 'x' + result += 'X' elif i == 3 and is_decade > 0: if mentions_year: - result += 'u' # year precision + result += 'X' # year precision else: - result += 'x' # decade precision + result += 'X' # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -235,12 +235,12 @@ def text_to_edtf_date(text): else: # different values were produced, meaning that it's likely # a default. Use 'unspecified' - result += "u" + result += 'X' # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ('u', 'x', '-'): + if result[i] not in ('X', '-'): smallest_length = 4 if mentions_month: @@ -264,11 +264,13 @@ def text_to_edtf_date(text): # end dateutil post-parsing - if is_uncertain: - result += "?" - - if is_approximate: - result += "~" + if is_uncertain and is_approximate: + result += "%" + else: + if is_uncertain: + result += "?" + if is_approximate: + result += "~" # weed out bad parses if result.startswith("uu-uu"): diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index eaa9af6..5bfb052 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -6,8 +6,8 @@ # where examples are tuples, the second item is the normalised output @pytest.mark.parametrize("input_text,expected_output", [ # Ignoring 'late' for simplicity in these examples - ('active late 17th-19th centuries', '16xx/18xx'), - ('active 17-19th Centuries', '16xx/18xx'), + ('active late 17th-19th centuries', '16XX/18XX'), + ('active 17-19th Centuries', '16XX/18XX'), # Unrecognised values ('', None), @@ -52,26 +52,26 @@ ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word ('attic. 1802', '1802'), # Avoid false positive 'circa' - # Masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # # Masked precision + # ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # Masked precision + uncertainty - ('ca. 1860s', '186x~'), - ('c. 1860s', '186x~'), - ('Circa 1840s', '184x~'), - ('circa 1840s', '184x~'), - ('ca. 1860s?', '186x?~'), - ('uncertain: approx 1862', '1862?~'), + # # Masked precision + uncertainty + # ('ca. 1860s', '186x~'), + # ('c. 1860s', '186x~'), + # ('Circa 1840s', '184x~'), + # ('circa 1840s', '184x~'), + # ('ca. 1860s?', '186x?~'), + # ('uncertain: approx 1862', '1862?~'), - # Ambiguous masked precision for centuries and decades - ('1800s', '18xx'), # Without additional uncertainty, use the century - ('2000s', '20xx'), # Without additional uncertainty, use the century - ('c1900s', '190x~'), # If there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade + # # Ambiguous masked precision for centuries and decades + ('1800s', '18XX'), # Without additional uncertainty, use the century + ('2000s', '20XX'), # Without additional uncertainty, use the century + ('c1900s', '190X~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180X%'), # If there's additional uncertainty, use the decade # Unspecified dates - ('January 12', 'uuuu-01-12'), - ('January', 'uuuu-01'), + ('January 12', 'XXXX-01-12'), + ('January', 'XXXX-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), @@ -83,49 +83,50 @@ ('Winter 1872', '1872-24'), # Dates relative to known events (before/after) - ('earlier than 1928', 'unknown/1928'), - ('before 1928', 'unknown/1928'), - ('after 1928', '1928/unknown'), - ('later than 1928', '1928/unknown'), - ('before January 1928', 'unknown/1928-01'), - ('before 18 January 1928', 'unknown/1928-01-18'), + ('earlier than 1928', '/1928'), + ('before 1928', '/1928'), + ('after 1928', '1928/'), + ('later than 1928', '1928/'), + ('before January 1928', '/1928-01'), + ('before 18 January 1928', '/1928-01-18'), # Approximations combined with before/after - ('before approx January 18 1928', 'unknown/1928-01-18~'), - ('before approx January 1928', 'unknown/1928-01~'), - ('after approx January 1928', '1928-01~/unknown'), - ('after approx Summer 1928', '1928-22~/unknown'), + ('before approx January 18 1928', '/1928-01-18~'), + ('before approx January 1928', '/1928-01~'), + ('after approx January 1928', '1928-01~/'), + ('after approx Summer 1928', '1928-22~/'), # Before and after with uncertain / unspecified components - ('after about the 1920s', '192x~/unknown'), - ('before about the 1900s', 'unknown/190x~'), - ('before the 1900s', 'unknown/19xx'), - - # Specifying unspecified components within a date - # ('decade in 1800s', '18ux'), #too esoteric - # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), # 186x has decade precision - ('year in the 1800s', '18xu'), # 186u has year precision - ('year in about the 1800s', '180u~'), - ('month in 1872', '1872-uu'), - ('day in Spring 1849', '1849-21-uu'), - ('day in January 1872', '1872-01-uu'), - ('day in 1872', '1872-uu-uu'), + ('after about the 1920s', '192X~/'), + ('before about the 1900s', '/190X~'), + ('before the 1900s', '/19XX'), + + # previous examples for masked precision, now removed from the EDTF spec + # use `X` for unknown regardless of precision or why the data is unknown + ('decade in 1800s', '18XX'), + ('decade somewhere during the 1800s', '18XX'), + ('year in the 1860s', '186X'), + ('year in the 1800s', '18XX'), + ('year in about the 1800s', '180X~'), + ('month in 1872', '1872-XX'), + ('day in Spring 1849', '1849-21-XX'), + ('day in January 1872', '1872-01-XX'), + ('day in 1872', '1872-XX-XX'), ('birthday in 1872', '1872'), # Handling centuries with approximation and uncertainty - ('1st century', '00xx'), - ('10c', '09xx'), - ('19th century', '18xx'), - ('19th century?', '18xx?'), - ('before 19th century', 'unknown/18xx'), - ('19c', '18xx'), - ('15c.', '14xx'), - ('ca. 19c', '18xx~'), - ('~19c', '18xx~'), - ('about 19c', '18xx~'), - ('19c?', '18xx?'), - ('c.19c?', '18xx?~'), + ('1st century', '00XX'), + ('10c', '09XX'), + ('19th century', '18XX'), + ('19th century?', '18XX?'), + ('before 19th century', '/18XX'), + ('19c', '18XX'), + ('15c.', '14XX'), + ('ca. 19c', '18XX~'), + ('~19c', '18XX~'), + ('about 19c', '18XX~'), + ('19c?', '18XX?'), + ('c.19c?', '18XX%'), # BC/AD dating ('1 AD', '0001'), @@ -137,13 +138,13 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 - ('2nd century bce', '-01xx'), - ('2nd century ad', '01xx'), - ('2nd century ce', '01xx'), + ('2nd century bc', '-01XX'), # -200 to -101 + ('2nd century bce', '-01XX'), + ('2nd century ad', '01XX'), + ('2nd century ce', '01XX'), # Combining uncertainties and approximations in creative ways - ('a day in about Spring 1849?', '1849-21-uu?~'), + ('a day in about Spring 1849?', '1849-21-XX%'), # Simple date ranges, showcasing both the limitations and capabilities of the parser # Not all of these results are correct EDTF, but this is as good as the EDTF implementation @@ -153,9 +154,9 @@ ('1851-52', '1851/1852'), ('1852 - 1860', '1852/1860'), ('1856-ca. 1865', '1856/1865~'), - ('1857-mid 1860s', '1857/186x'), + ('1857-mid 1860s', '1857/186X'), ('1858/1860', '[1858, 1860]'), - ('1860s-1870s', '186x/187x'), + ('1860s-1870s', '186X/187X'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -168,28 +169,16 @@ ('1864-1872, printed 1870s', '1864/1872'), ('1868-1871?', '1868/1871?'), ('1869-70', '1869/1870'), - ('1870s, printed ca. 1880s', '187x'), + ('1870s, printed ca. 1880s', '187X'), ('1900-1903, cast before 1929', '1900/1903'), ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} - ('1920s -early 1930s', '192x/193x'), - ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, + ('1920s -early 1930s', '192X/193X'), + ('1930s, printed early 1960s', '193X'), # should be something like {193x, 196x}, ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} - - # Uncertain and approximate on different parts of the date - # for these to work we need to recast is_uncertain and is_approximate - # such that they work on different parts. Probably worth rolling our own - # dateparser at this point. - # ('July in about 1849', '1849~-07'), - # ('a day in July in about 1849', '1849~-07-uu'), - # ('a day in Spring in about 1849', '1849~-21-uu'), - # ('a day in about July? in about 1849', '1849~-07?~-uu'), - # ('a day in about Spring in about 1849', '1849~-21~-uu'), - # ('maybe January in some year in about the 1830s', '183u~-01?'), - # ('about July? in about 1849', '1849~-07?~'), ]) def test_natlang(input_text, expected_output): @@ -198,5 +187,5 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert result == expected_output, f"Failed for input: {input_text}" + assert result == expected_output, f"Failed for input: {input_text} - expected {expected_output}, got {result}" From 4fd07820a71defb8d4a6e61434de13b178bcbdd3 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Sun, 12 May 2024 19:05:31 -0400 Subject: [PATCH 008/135] More updates of tests and English parser --- edtf/natlang/en.py | 8 ++++---- edtf/natlang/tests.py | 28 ++++++++++++---------------- edtf/parser/tests.py | 32 +++++++++++++++----------------- 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 213d17f..1f46c37 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -225,16 +225,16 @@ def text_to_edtf_date(text): result += 'X' elif i == 3 and is_decade > 0: if mentions_year: - result += 'X' # year precision + result += 'X' # previously year precision - now just X else: - result += 'X' # decade precision + result += 'X' # previously decade precision - now just X elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default result += date1[i] else: # different values were produced, meaning that it's likely - # a default. Use 'unspecified' + # a default. Use 'X' result += 'X' # strip off unknown chars from end of string - except the first 4 @@ -273,7 +273,7 @@ def text_to_edtf_date(text): result += "~" # weed out bad parses - if result.startswith("uu-uu"): + if result.startswith("XX-XX"): return None return result diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 5bfb052..911fc13 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -52,22 +52,18 @@ ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word ('attic. 1802', '1802'), # Avoid false positive 'circa' - # # Masked precision - # ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - - # # Masked precision + uncertainty - # ('ca. 1860s', '186x~'), - # ('c. 1860s', '186x~'), - # ('Circa 1840s', '184x~'), - # ('circa 1840s', '184x~'), - # ('ca. 1860s?', '186x?~'), - # ('uncertain: approx 1862', '1862?~'), - - # # Ambiguous masked precision for centuries and decades - ('1800s', '18XX'), # Without additional uncertainty, use the century - ('2000s', '20XX'), # Without additional uncertainty, use the century - ('c1900s', '190X~'), # If there's additional uncertainty, use the decade - ('c1800s?', '180X%'), # If there's additional uncertainty, use the decade + # Previously tested masked precision, uncertain or ambiguous masked precision + ('1860s', '186X'), + ('ca. 1860s', '186X~'), + ('c. 1860s', '186X~'), + ('Circa 1840s', '184X~'), + ('circa 1840s', '184X~'), + ('ca. 1860s?', '186X%'), + ('uncertain: approx 1862', '1862%'), + ('1800s', '18XX'), + ('2000s', '20XX'), + ('c1900s', '190X~'), + ('c1800s?', '180X%'), # Unspecified dates ('January 12', 'XXXX-01-12'), diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 877fd0b..026622c 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -153,9 +153,7 @@ # December 1760 or some later month ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - # This test is failing due to a code issue: - # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' - ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), # Either the year 1667 or the month December of 1760. ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates @@ -164,11 +162,11 @@ # The year 1960 and the month December of 1961. ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), - # Masked Precision --> eliminated + # Previously tested masked precision, now eliminated from the spec # A date during the 1960s - #('196x', '1960-01-01', '1969-12-31'), + ('196X', ('1960-01-01', '1969-12-31')), # A date during the 1900s - #('19xx', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 @@ -180,6 +178,7 @@ ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), + # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), @@ -227,7 +226,6 @@ def iso_to_struct_time(iso_date): y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): """ Test parsing of EDTF strings with expected outputs. """ @@ -245,25 +243,25 @@ def test_edtf_examples(test_input, expected_tuple): elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert result.lower_strict() == strict_date, "Lower strict date does not match" - assert result.upper_strict() == strict_date, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert result.lower_strict() == strict_date, f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + assert result.upper_strict() == strict_date, f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From d23ff7b6932313c2d42f14ffeac2a3ffe9d32afd Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 13 May 2024 09:01:39 -0400 Subject: [PATCH 009/135] Remove masked precision and unspecified from README --- README.md | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 76aec1a..bf3155b 100644 --- a/README.md +++ b/README.md @@ -196,43 +196,33 @@ The parser can parse strings such as: 'c.1860' => '1860~' #with or without . 'ca1860' => '1860~' 'approx 1860' => '1860~' - - # masked precision - '1860s' => '186x' #186x has decade precision, 186u has year precision. - '1800s' => '18xx' # without uncertainty indicators, assume century - - # masked precision + uncertainty - 'ca. 1860s' => '186x~' - 'circa 1840s' => '184x~' - 'ca. 1860s?' => '186x?~' - 'c1800s?' => '180x?~' # with uncertainty indicators, use the decade + 'ca. 1860s' => '186X~' + 'circa 1840s' => '184X~' + 'ca. 1860s?' => '186X?~' + 'c1800s?' => '180X?~' # with uncertainty indicators, use the decade # unspecified parts 'January 12' => 'XXXX-01-12' 'January' => 'XXXX-01' '7/2008' => '2008-07' + 'month in 1872' => '1872-XX' + 'day in January 1872' => '1872-01-XX' + 'day in 1872' => '1872-XX-XX' #seasons 'Autumn 1872' => '1872-23' 'Fall 1872' => '1872-23' # before/after - 'earlier than 1928' => 'unknown/1928' - 'later than 1928' => '1928/unknown' - 'before January 1928' => 'unknown/1928-01' - 'after about the 1920s' => '192x~/unknown' - - # unspecified - 'year in the 1860s' => '186u' #186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu') - 'month in 1872' => '1872-XX' - 'day in January 1872' => '1872-01-XX' - 'day in 1872' => '1872-XX-XX' + 'earlier than 1928' => '/1928' + 'later than 1928' => '1928/' + 'before January 1928' => '/1928-01' + 'after about the 1920s' => '192X~/' #centuries - '1st century' => '00xx' - '10c' => '09xx' - '19th century?' => '18xx?' + '1st century' => '00XX' + '10c' => '09XX' + '19th century?' => '18XX?' # just showing off now... 'a day in about Spring 1849?' => '1849-21-XX?~' @@ -243,8 +233,8 @@ The parser can parse strings such as: '1851-1852; printed 1853-1854' => '1851/1852' '1851-52' => '1851/1852' '1856-ca. 1865' => '1856/1865~' - '1860s-1870s' => '186x/187x' - '1920s -early 1930s' => '192x/193x' + '1860s-1870s' => '186X/187X' + '1920s - early 1930s' => '192X/193X' '1938, printed 1940s-1950s' => '1938' From f1cd472916438b9c034959b7b7c7cfc420938d12 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 13 May 2024 15:08:50 -0400 Subject: [PATCH 010/135] Better grouping of group qualification tests --- edtf/parser/tests.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 026622c..817354a 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -111,29 +111,35 @@ ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - # Partial Uncertain/Approximate + # Qualification + # Group qualification: a qualification character to the immediate right of a component applies + # to that component as well as to all components to the left. + # year, month, and day are uncertain and approximate + ('2004-06-11%', ('2004-06-11', '2004-06-09', '2004-06-13')), # uncertain year; month, day known ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), - # uncertain month, year and day known - ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), + + # Qualification of individual component: a qualification character to the immediate left + # of the component applies to that component only # day is approximate; year, month known ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), - # Year known, month within year is approximate and uncertain - NEW SPEC + # Year known, month within year is approximate and uncertain ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), - # Year known, month and day uncertain - NEW SPEC + # Year known, month and day uncertain ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), - # Year uncertain, month known, day approximate - NEW SPEC + # Year uncertain, month known, day approximate ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), - # Year uncertain and month is both uncertain and approximate - NEW SPEC + # Year uncertain and month is both uncertain and approximate ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), - # Year uncertain, month and day approximate. - NEW SPEC + # Year uncertain, month and day approximate ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), - # Year known, month and day approximate. - NEW SPEC + # Year known, month and day approximate ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), + # Partial unspecified # December 25 sometime during the 1560s ('156X-12-25', ('1560-12-25', '1569-12-25')), @@ -180,7 +186,6 @@ ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons # Spring southern hemisphere, 2001 @@ -190,6 +195,7 @@ ) BAD_EXAMPLES = ( + # parentheses are not used for group qualification in the 2018 spec None, '', 'not a edtf string', From e8b643357c55d51677959feb0785776b50541425 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 16 May 2024 09:46:07 -0400 Subject: [PATCH 011/135] Update year prefix in docs --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bf3155b..9456dfa 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ Test coverage includes every example given in the spec table of features. * Years exceeding four digits: - >>> parse_edtf('y-12000') # 12000 years BCE - LongYear: 'y-12000' + >>> parse_edtf('Y-12000') # 12000 years BCE + LongYear: 'Y-12000' * Season: @@ -167,8 +167,8 @@ Test coverage includes every example given in the spec table of features. * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('y-17e7') - ExponentialYear: 'y-17e7' + >>> parse_edtf('Y-17e7') + ExponentialYear: 'Y-17e7' ### Natural language representation From f74ae803d879c8e3f280b974772130711a4cdaa7 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 20 May 2024 22:28:09 -0400 Subject: [PATCH 012/135] Linting fixes --- edtf/natlang/en.py | 30 ++-- edtf/natlang/tests.py | 347 +++++++++++++++++++++--------------------- edtf/parser/tests.py | 74 +++++---- 3 files changed, 233 insertions(+), 218 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 6ecb190..f6eef54 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -14,10 +14,10 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\dX])([\dX])([\dX])([\dX])' -LONG_YEAR_RE = r'Y(-?)([1-9]\d\d\d\d+)' -CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' -CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' +SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" +LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" +CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. @@ -101,9 +101,9 @@ def text_to_edtf(text): is_after = is_after or re.findall(r"\blater\b", t) if is_before: - result = f"/{result}" # unknown is replaced with null for intervals + result = f"/{result}" # unknown is replaced with null for intervals elif is_after: - result = f"{result}/" # unknown is replaced with null for intervals + result = f"{result}/" # unknown is replaced with null for intervals return result @@ -155,9 +155,8 @@ def text_to_edtf_date(text): is_ce = re.findall(CE_RE, t) if is_century: result = "%02dXX" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) + is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) try: is_bc = is_century[0][-1] in ("bc", "bce") @@ -221,14 +220,13 @@ def text_to_edtf_date(text): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. - if i == 2 and could_be_century and \ - not (is_approximate or is_uncertain): - result += 'X' + if i == 2 and could_be_century and not (is_approximate or is_uncertain): + result += "X" elif i == 3 and is_decade > 0: if mentions_year: - result += 'X' # previously year precision - now just X + result += "X" # previously year precision - now just X else: - result += 'X' # previously decade precision - now just X + result += "X" # previously decade precision - now just X elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -236,12 +234,12 @@ def text_to_edtf_date(text): else: # different values were produced, meaning that it's likely # a default. Use 'X' - result += 'X' + result += "X" # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ('X', '-'): + if result[i] not in ("X", "-"): smallest_length = 4 if mentions_month: diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 290fead..78ecbc9 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -4,185 +4,184 @@ from edtf.natlang.en import text_to_edtf + # TODO update the tests and code to test and output the new spec # where examples are tuples, the second item is the normalised output -@pytest.mark.parametrize("input_text,expected_output", [ - # Ignoring 'late' for simplicity in these examples - ('active late 17th-19th centuries', '16XX/18XX'), - ('active 17-19th Centuries', '16XX/18XX'), - - # Unrecognised values - ('', None), - ('this isn\'t a date', None), - - # Explicitly rejected values that would otherwise be badly converted - ('23rd Dynasty', None), - - # Implied century and specific years - ('90', '1990'), # Implied century - ('1860', '1860'), - ('the year 1800', '1800'), - ('the year 1897', '1897'), - ('January 2008', '2008-01'), - ('January 12, 1940', '1940-01-12'), - - # Uncertain or approximate dates - ('1860?', '1860?'), - ('1862 (uncertain)', '1862?'), - ('maybe 1862', '1862?'), - ('1862 maybe', '1862?'), - ('1862 guess', '1862?'), - ('uncertain: 1862', '1862?'), - ('uncertain: Jan 18 1862', '1862-01-18?'), - ('~ Feb 1812', '1812-02~'), - ('circa Feb 1812', '1812-02~'), - ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # Different abbreviations - ('c.1860', '1860~'), # With or without . - ('ca1860', '1860~'), - ('ca.1860', '1860~'), - ('c 1860', '1860~'), # With or without space - ('c. 1860', '1860~'), - ('ca. 1860', '1860~'), - ('approx 1860', '1860~'), - ('1860 approx', '1860~'), - ('1860 approximately', '1860~'), - ('approximately 1860', '1860~'), - ('about 1860', '1860~'), - ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # Avoid words containing 'circa' - ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word - ('attic. 1802', '1802'), # Avoid false positive 'circa' - - # Previously tested masked precision, uncertain or ambiguous masked precision - ('1860s', '186X'), - ('ca. 1860s', '186X~'), - ('c. 1860s', '186X~'), - ('Circa 1840s', '184X~'), - ('circa 1840s', '184X~'), - ('ca. 1860s?', '186X%'), - ('uncertain: approx 1862', '1862%'), - ('1800s', '18XX'), - ('2000s', '20XX'), - ('c1900s', '190X~'), - ('c1800s?', '180X%'), - - # Unspecified dates - ('January 12', 'XXXX-01-12'), - ('January', 'XXXX-01'), - ('10/7/2008', '2008-10-07'), - ('7/2008', '2008-07'), - - # Seasons mapped to specific codes - ('Spring 1872', '1872-21'), - ('Summer 1872', '1872-22'), - ('Autumn 1872', '1872-23'), - ('Fall 1872', '1872-23'), - ('Winter 1872', '1872-24'), - - # Dates relative to known events (before/after) - ('earlier than 1928', '/1928'), - ('before 1928', '/1928'), - ('after 1928', '1928/'), - ('later than 1928', '1928/'), - ('before January 1928', '/1928-01'), - ('before 18 January 1928', '/1928-01-18'), - - # Approximations combined with before/after - ('before approx January 18 1928', '/1928-01-18~'), - ('before approx January 1928', '/1928-01~'), - ('after approx January 1928', '1928-01~/'), - ('after approx Summer 1928', '1928-22~/'), - - # Before and after with uncertain / unspecified components - ('after about the 1920s', '192X~/'), - ('before about the 1900s', '/190X~'), - ('before the 1900s', '/19XX'), - - # previous examples for masked precision, now removed from the EDTF spec - # use `X` for unknown regardless of precision or why the data is unknown - ('decade in 1800s', '18XX'), - ('decade somewhere during the 1800s', '18XX'), - ('year in the 1860s', '186X'), - ('year in the 1800s', '18XX'), - ('year in about the 1800s', '180X~'), - ('month in 1872', '1872-XX'), - ('day in Spring 1849', '1849-21-XX'), - ('day in January 1872', '1872-01-XX'), - ('day in 1872', '1872-XX-XX'), - ('birthday in 1872', '1872'), - - # Handling centuries with approximation and uncertainty - ('1st century', '00XX'), - ('10c', '09XX'), - ('19th century', '18XX'), - ('19th century?', '18XX?'), - ('before 19th century', '/18XX'), - ('19c', '18XX'), - ('15c.', '14XX'), - ('ca. 19c', '18XX~'), - ('~19c', '18XX~'), - ('about 19c', '18XX~'), - ('19c?', '18XX?'), - ('c.19c?', '18XX%'), - - # BC/AD dating - ('1 AD', '0001'), - ('17 CE', '0017'), - ('127 CE', '0127'), - ('1270 CE', '1270'), - ('c1 AD', '0001~'), - ('c17 CE', '0017~'), - ('c127 CE', '0127~'), - ('c1270 CE', '1270~'), - ('c64 BCE', '-0064~'), - ('2nd century bc', '-01XX'), # -200 to -101 - ('2nd century bce', '-01XX'), - ('2nd century ad', '01XX'), - ('2nd century ce', '01XX'), - - # Combining uncertainties and approximations in creative ways - ('a day in about Spring 1849?', '1849-21-XX%'), - - # Simple date ranges, showcasing both the limitations and capabilities of the parser - # Not all of these results are correct EDTF, but this is as good as the EDTF implementation - # and simple natural language parser we have. - ('1851-1852', '1851/1852'), - ('1851-1852; printed 1853-1854', '1851/1852'), - ('1851-52', '1851/1852'), - ('1852 - 1860', '1852/1860'), - ('1856-ca. 1865', '1856/1865~'), - ('1857-mid 1860s', '1857/186X'), - ('1858/1860', '[1858, 1860]'), - ('1860s-1870s', '186X/187X'), - ('1910-30', '1910/1930'), - ('active 1910-30', '1910/1930'), - ('1861-67', '1861/1867'), - ('1861-67 (later print)', '1861/1867'), - ('1863 or 1864', '1863'), - ('1863, printed 1870', '1863'), - ('1863, printed ca. 1866', '1863'), - ('1864 or 1866', '1864'), - ('1864, printed ca. 1864', '1864'), - ('1864-1872, printed 1870s', '1864/1872'), - ('1868-1871?', '1868/1871?'), - ('1869-70', '1869/1870'), - ('1870s, printed ca. 1880s', '187X'), - ('1900-1903, cast before 1929', '1900/1903'), - ('1900; 1973', '1900'), - ('1900; printed 1912', '1900'), - ('1915 late - autumn 1916', '1915/1916-23'), - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} - ('1920s -early 1930s', '192X/193X'), - ('1930s, printed early 1960s', '193X'), # should be something like {193x, 196x}, - ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} -]) - +@pytest.mark.parametrize( + "input_text,expected_output", + [ + # Ignoring 'late' for simplicity in these examples + ("active late 17th-19th centuries", "16XX/18XX"), + ("active 17-19th Centuries", "16XX/18XX"), + # Unrecognised values + ("", None), + ("this isn't a date", None), + # Explicitly rejected values that would otherwise be badly converted + ("23rd Dynasty", None), + # Implied century and specific years + ("90", "1990"), # Implied century + ("1860", "1860"), + ("the year 1800", "1800"), + ("the year 1897", "1897"), + ("January 2008", "2008-01"), + ("January 12, 1940", "1940-01-12"), + # Uncertain or approximate dates + ("1860?", "1860?"), + ("1862 (uncertain)", "1862?"), + ("maybe 1862", "1862?"), + ("1862 maybe", "1862?"), + ("1862 guess", "1862?"), + ("uncertain: 1862", "1862?"), + ("uncertain: Jan 18 1862", "1862-01-18?"), + ("~ Feb 1812", "1812-02~"), + ("circa Feb 1812", "1812-02~"), + ("Feb 1812 approx", "1812-02~"), + ("c1860", "1860~"), # Different abbreviations + ("c.1860", "1860~"), # With or without . + ("ca1860", "1860~"), + ("ca.1860", "1860~"), + ("c 1860", "1860~"), # With or without space + ("c. 1860", "1860~"), + ("ca. 1860", "1860~"), + ("approx 1860", "1860~"), + ("1860 approx", "1860~"), + ("1860 approximately", "1860~"), + ("approximately 1860", "1860~"), + ("about 1860", "1860~"), + ("about Spring 1849", "1849-21~"), + ("notcirca 1860", "1860"), # Avoid words containing 'circa' + ( + "attica 1802", + "1802", + ), # Avoid false positive 'circa' at the end of preceding word + ("attic. 1802", "1802"), # Avoid false positive 'circa' + # Previously tested masked precision, uncertain or ambiguous masked precision + ("1860s", "186X"), + ("ca. 1860s", "186X~"), + ("c. 1860s", "186X~"), + ("Circa 1840s", "184X~"), + ("circa 1840s", "184X~"), + ("ca. 1860s?", "186X%"), + ("uncertain: approx 1862", "1862%"), + ("1800s", "18XX"), + ("2000s", "20XX"), + ("c1900s", "190X~"), + ("c1800s?", "180X%"), + # Unspecified dates + ("January 12", "XXXX-01-12"), + ("January", "XXXX-01"), + ("10/7/2008", "2008-10-07"), + ("7/2008", "2008-07"), + # Seasons mapped to specific codes + ("Spring 1872", "1872-21"), + ("Summer 1872", "1872-22"), + ("Autumn 1872", "1872-23"), + ("Fall 1872", "1872-23"), + ("Winter 1872", "1872-24"), + # Dates relative to known events (before/after) + ("earlier than 1928", "/1928"), + ("before 1928", "/1928"), + ("after 1928", "1928/"), + ("later than 1928", "1928/"), + ("before January 1928", "/1928-01"), + ("before 18 January 1928", "/1928-01-18"), + # Approximations combined with before/after + ("before approx January 18 1928", "/1928-01-18~"), + ("before approx January 1928", "/1928-01~"), + ("after approx January 1928", "1928-01~/"), + ("after approx Summer 1928", "1928-22~/"), + # Before and after with uncertain / unspecified components + ("after about the 1920s", "192X~/"), + ("before about the 1900s", "/190X~"), + ("before the 1900s", "/19XX"), + # previous examples for masked precision, now removed from the EDTF spec + # use `X` for unknown regardless of precision or why the data is unknown + ("decade in 1800s", "18XX"), + ("decade somewhere during the 1800s", "18XX"), + ("year in the 1860s", "186X"), + ("year in the 1800s", "18XX"), + ("year in about the 1800s", "180X~"), + ("month in 1872", "1872-XX"), + ("day in Spring 1849", "1849-21-XX"), + ("day in January 1872", "1872-01-XX"), + ("day in 1872", "1872-XX-XX"), + ("birthday in 1872", "1872"), + # Handling centuries with approximation and uncertainty + ("1st century", "00XX"), + ("10c", "09XX"), + ("19th century", "18XX"), + ("19th century?", "18XX?"), + ("before 19th century", "/18XX"), + ("19c", "18XX"), + ("15c.", "14XX"), + ("ca. 19c", "18XX~"), + ("~19c", "18XX~"), + ("about 19c", "18XX~"), + ("19c?", "18XX?"), + ("c.19c?", "18XX%"), + # BC/AD dating + ("1 AD", "0001"), + ("17 CE", "0017"), + ("127 CE", "0127"), + ("1270 CE", "1270"), + ("c1 AD", "0001~"), + ("c17 CE", "0017~"), + ("c127 CE", "0127~"), + ("c1270 CE", "1270~"), + ("c64 BCE", "-0064~"), + ("2nd century bc", "-01XX"), # -200 to -101 + ("2nd century bce", "-01XX"), + ("2nd century ad", "01XX"), + ("2nd century ce", "01XX"), + # Combining uncertainties and approximations in creative ways + ("a day in about Spring 1849?", "1849-21-XX%"), + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. + ("1851-1852", "1851/1852"), + ("1851-1852; printed 1853-1854", "1851/1852"), + ("1851-52", "1851/1852"), + ("1852 - 1860", "1852/1860"), + ("1856-ca. 1865", "1856/1865~"), + ("1857-mid 1860s", "1857/186X"), + ("1858/1860", "[1858, 1860]"), + ("1860s-1870s", "186X/187X"), + ("1910-30", "1910/1930"), + ("active 1910-30", "1910/1930"), + ("1861-67", "1861/1867"), + ("1861-67 (later print)", "1861/1867"), + ("1863 or 1864", "1863"), + ("1863, printed 1870", "1863"), + ("1863, printed ca. 1866", "1863"), + ("1864 or 1866", "1864"), + ("1864, printed ca. 1864", "1864"), + ("1864-1872, printed 1870s", "1864/1872"), + ("1868-1871?", "1868/1871?"), + ("1869-70", "1869/1870"), + ("1870s, printed ca. 1880s", "187X"), + ("1900-1903, cast before 1929", "1900/1903"), + ("1900; 1973", "1900"), + ("1900; printed 1912", "1900"), + ("1915 late - autumn 1916", "1915/1916-23"), + ("1915, from Camerawork, October 1916", "1915"), # should be {1915, 1916-10} + ("1920s -early 1930s", "192X/193X"), + ( + "1930s, printed early 1960s", + "193X", + ), # should be something like {193x, 196x}, + ("1932, printed 1976 by Gunther Sander", "1932"), # should be {1932, 1976} + ( + "1938, printed 1940s-1950s", + "1938", + ), # should be something like {1938, 194x-195x} + ], +) def test_natlang(input_text, expected_output): """ Test natural language conversion to EDTF format: Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert result == expected_output, f"Failed for input: {input_text} - expected {expected_output}, got {result}" + assert ( + result == expected_output + ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index ae82057..69891b0 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -117,30 +117,29 @@ # Group qualification: a qualification character to the immediate right of a component applies # to that component as well as to all components to the left. # year, month, and day are uncertain and approximate - ('2004-06-11%', ('2004-06-11', '2004-06-09', '2004-06-13')), + ("2004-06-11%", ("2004-06-11", "2004-06-09", "2004-06-13")), # uncertain year; month, day known ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known - ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), - + ("2004-06~-11", ("2004-06-11", "2003-05-11", "2005-07-11")), # Qualification of individual component: a qualification character to the immediate left # of the component applies to that component only # day is approximate; year, month known - ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), + ("2004-06-~11", ("2004-06-11", "2004-06-10", "2004-06-12")), # Year known, month within year is approximate and uncertain - ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), + ("2004-%06", ("2004-06-01", "2004-06-30", "2004-04-01", "2004-08-30")), # Year known, month and day uncertain - ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), + ("2004-?06-?11", ("2004-06-11", "2004-05-10", "2004-07-12")), # Year uncertain, month known, day approximate - ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), + ("2004?-06-~11", ("2004-06-11", "2003-06-10", "2005-06-12")), # Year uncertain and month is both uncertain and approximate - ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("?2004-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), + ("2004?-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # Year uncertain, month and day approximate - ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), + ("2004?-~06-~04", ("2004-06-04", "2003-05-03", "2005-07-05")), # Year known, month and day approximate - ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), + ("2011-~06-~04", ("2011-06-04", "2011-05-03", "2011-07-05")), # Partial unspecified # December 25 sometime during the 1560s ("156X-12-25", ("1560-12-25", "1569-12-25")), @@ -159,21 +158,19 @@ # December 1760 or some later month ("[1760-12..]", ("1760-12-01", "inf")), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), + ("[1760-01, 1760-02, 1760-12..]", ("1760-01-01", "inf")), # Either the year 1667 or the month December of 1760. ("[1667, 1760-12]", ("1667-01-01", "1760-12-31")), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 ("{1667,1668, 1670..1672}", ("1667-01-01", "1672-12-31")), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), - + ("{1960, 1961-12}", ("1960-01-01", "1961-12-31")), # Previously tested masked precision, now eliminated from the spec # A date during the 1960s - ('196X', ('1960-01-01', '1969-12-31')), + ("196X", ("1960-01-01", "1969-12-31")), # A date during the 1900s - ('19XX', ('1900-01-01', '1999-12-31')), - + ("19XX", ("1900-01-01", "1999-12-31")), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 ( @@ -186,7 +183,7 @@ # the year 170000000 ("Y17E7", ("170000000-01-01", "170000000-12-31")), # the year -170000000 - ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), + ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), @@ -236,6 +233,7 @@ def iso_to_struct_time(iso_date): y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): """Test parsing of EDTF strings with expected outputs.""" @@ -255,25 +253,45 @@ def test_edtf_examples(test_input, expected_tuple): elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert result.lower_strict() == strict_date, f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" - assert result.upper_strict() == strict_date, f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" - assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert ( + result.lower_strict() == strict_date + ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + assert ( + result.upper_strict() == strict_date + ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert result.lower_strict() == lower_strict, f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert result.upper_strict() == upper_strict, f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" - assert result.lower_fuzzy() == lower_fuzzy, f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert result.upper_fuzzy() == upper_fuzzy, f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From 26b0afb312115ac691e06ef9b03561ad283a90f2 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 21 May 2024 09:30:34 -0400 Subject: [PATCH 013/135] Fix qualification (complete) for L1 qualification Apply it to the entire date when a date is parsed as UncertainOrApproximate (L1 qualified) --- edtf/parser/parser_classes.py | 26 +++++++++++++++++++------- edtf/parser/tests.py | 22 +++++++++++++++------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2b4368a..bb9a213 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -442,15 +442,27 @@ def _strict_date(self, lean): def _get_fuzzy_padding(self, lean): if not self.ua: - return relativedelta(0) + return relativedelta() multiplier = self.ua._get_multiplier() + padding = relativedelta() + + # Check the presence of uncertainty on each component + # self.precision not helpful here: + # L1 qualified EDTF dates apply qualification across all parts of the date + if self.date.year: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.date.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.date.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) - if self.date.precision == PRECISION_DAY: - return multiplier * appsettings.PADDING_DAY_PRECISION - elif self.date.precision == PRECISION_MONTH: - return multiplier * appsettings.PADDING_MONTH_PRECISION - elif self.date.precision == PRECISION_YEAR: - return multiplier * appsettings.PADDING_YEAR_PRECISION + return padding class UnspecifiedIntervalSection(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 69891b0..8d9a770 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -61,8 +61,11 @@ # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely ("1984?", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), - ("2004-06-11?", ("2004-06-11", "2004-06-11", "2004-06-10", "2004-06-12")), - ("2004-06?", ("2004-06-01", "2004-06-30", "2004-05-01", "2004-07-30")), + ( + "2004-06-11?", + ("2004-06-11", "2003-05-10", "2005-07-12"), + ), # everything is fuzzy by 100% for "qualification of a date (complete)" (L1) + ("2004-06?", ("2004-06-01", "2004-06-30", "2003-05-01", "2005-07-30")), # "approximately" the year 1984 ("1984~", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), # the year is approximately 1984 and even that is uncertain @@ -84,6 +87,7 @@ ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), # L1 Extended Interval # beginning unknown, end 2006 + # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) ("/2006", ("1996-12-31", "2006-12-31")), # beginning June 1, 2004, end unknown ("2004-06-01/", ("2004-06-01", "2014-06-01")), @@ -94,16 +98,16 @@ # interval beginning approximately 1984 and ending June 2004 ("1984~/2004-06", ("1984-01-01", "2004-06-30", "1983-01-01", "2004-06-30")), # interval beginning 1984 and ending approximately June 2004 - ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2004-07-30")), + ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2005-07-30")), ("1984?/2004%", ("1984-01-01", "2004-12-31", "1983-01-01", "2006-12-31")), ("1984~/2004~", ("1984-01-01", "2004-12-31", "1983-01-01", "2005-12-31")), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1984-05-01", "2004-09-30")), + ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1983-05-01", "2005-09-30")), ( "1984-06-02?/2004-08-08~", - ("1984-06-02", "2004-08-08", "1984-06-01", "2004-08-09"), + ("1984-06-02", "2004-08-08", "1983-05-01", "2005-09-09"), ), - ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1984-06-01", "1994-06-02")), + ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1983-05-01", "1994-06-02")), # Year exceeding 4 digits ("Y170000002", ("170000002-01-01", "170000002-12-31")), ("Y-170000002", ("-170000002-01-01", "-170000002-12-31")), @@ -117,7 +121,11 @@ # Group qualification: a qualification character to the immediate right of a component applies # to that component as well as to all components to the left. # year, month, and day are uncertain and approximate - ("2004-06-11%", ("2004-06-11", "2004-06-09", "2004-06-13")), + # this example appears under "group qualification" but actually parses as L1 UncertainOrApproximate + ( + "2004-06-11%", + ("2004-06-11", "2002-04-09", "2006-08-13"), + ), # all parts to the left are fuzzy by 200% # uncertain year; month, day known ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known From 48a9b02749076c6ae29b468724ef1ad65439b35e Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:28:44 -0400 Subject: [PATCH 014/135] Add code coverage The ci.yml updates add a commit to PRs. I tested this locally using `act` as best I could, but ran into an issue that I think will resolve when running on the real runner (undefined head). We'll see how it works when the workflows actually run ... --- .github/workflows/ci.yml | 34 ++++++++++++++++++++++++++++++++-- .gitignore | 3 +++ README.md | 4 ++++ pyproject.toml | 22 +++++++++++++++++++++- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b41c764..be0326d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.12"] + # python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] defaults: run: working-directory: . @@ -38,8 +39,37 @@ jobs: - name: Run unit tests run: | pytest + mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - python manage.py test edtf_integration + coverage run manage.py test edtf_integration + mv .coverage ../.coverage_django + + - name: Combine coverage reports + run: | + coverage combine .coverage_main .coverage_django + coverage report --omit="edtf_django_tests/*" + coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + + - name: Pytest coverage comment + uses: MishaKav/pytest-coverage-comment@main + with: + pytest-xml-coverage-path: ./coverage_combined.xml + unique-id-for-comment: ${{ matrix.python-version }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check the output coverage + run: | + echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" + echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" + echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" + echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" + echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" + echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" + echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" + echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" + echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" + echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" diff --git a/.gitignore b/.gitignore index 7c23190..182cf8b 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,9 @@ htmlcov/ .cache nosetests.xml coverage.xml +coverage_combined.xml +.coverage_main +.coverage_django *,cover # Translations diff --git a/README.md b/README.md index 82a9b7d..c4f172e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ edtf ===== + + + An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. See http://www.loc.gov/standards/datetime/ for the current draft specification. @@ -376,6 +379,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` +- To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` ### Linting and formatting - Check linting: `ruff check --output-format=github --config pyproject.toml` diff --git a/pyproject.toml b/pyproject.toml index 869daf6..8dea9fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,8 @@ test = [ "pytest", "ruff", "pre-commit", + "coverage", + "pytest-cov" ] [project.urls] @@ -77,7 +79,25 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/" +addopts = "--ignore=edtf_django_tests/ --cov=edtf --cov-report=xml" +plugins = ["pytest_cov"] + +[tool.coverage.run] +# we run the edtf_integration tests but only care about them testing fields.py in the main package +omit = [ + "edtf_django_tests/*" +] + +[tool.coverage.report] +exclude_lines = [ + # Don't complain about missing debug-only code: + "if __name__ == .__main__.:", + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + "raise NotImplemented", + "raise NotImplemented" +] [tool.ruff] # Python 3.8 From 09b10d8ca6e196558523f37afa15cffa2c78b2d0 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:28:57 -0400 Subject: [PATCH 015/135] Create coverage_readme.yml The new workflow adds a badge to the readme based on coverage for Python 3.12. --- .github/workflows/coverage_readme.yml | 68 +++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/coverage_readme.yml diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml new file mode 100644 index 0000000..86309de --- /dev/null +++ b/.github/workflows/coverage_readme.yml @@ -0,0 +1,68 @@ +name: Update Coverage on Readme +on: + push: + branches: + - main + +# https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs +# `contents` is for permission to the contents of the repository. +# `pull-requests` is for permission to pull request +permissions: + contents: write + checks: write + pull-requests: write + +# see: https://github.com/MishaKav/pytest-coverage-comment +jobs: + update-coverage-on-readme: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + cache: 'pip' + cache-dependency-path: '**/pyproject.toml' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[test] + + - name: Run tests and generate coverage + run: | + pytest + mv .coverage .coverage_main + cd edtf_django_tests + coverage run manage.py test edtf_integration + mv .coverage ../.coverage_django + cd .. + coverage combine .coverage_main .coverage_django + coverage report --omit="edtf_django_tests/*" + coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + + - name: Pytest coverage comment + if: ${{ github.ref == 'refs/heads/main' }} + id: coverageComment + uses: MishaKav/pytest-coverage-comment@main + with: + pytest-xml-coverage-path: ./coverage_combined.xml + hide-comment: true + + - name: Update Readme with Coverage Html + if: ${{ github.ref == 'refs/heads/main' }} + run: | + sed -i '//,//c\\n\${{ steps.coverageComment.outputs.coverageHtml }}\n' ./README.md + + - name: Commit & Push changes to README + run: | + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git add README.md + git commit -m 'Update coverage badge in README' + git push From ac4705f5bd7b41f49f45458f053f7f55d468a29a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:29:16 -0400 Subject: [PATCH 016/135] Remove unnecessary files --- edtf/parser/grammar_test.py | 360 ------------ edtf/parser/parser_classes_tests.py | 834 ---------------------------- vagrant wheel install problems.txt | 5 - 3 files changed, 1199 deletions(-) delete mode 100644 edtf/parser/grammar_test.py delete mode 100644 edtf/parser/parser_classes_tests.py delete mode 100644 vagrant wheel install problems.txt diff --git a/edtf/parser/grammar_test.py b/edtf/parser/grammar_test.py deleted file mode 100644 index c8ff727..0000000 --- a/edtf/parser/grammar_test.py +++ /dev/null @@ -1,360 +0,0 @@ -from pyparsing import ( - Combine, - NotAny, - OneOrMore, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - nums, - oneOf, -) -from pyparsing import Literal as L - -from edtf.parser.edtf_exceptions import EDTFParseException - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import ( - UA, - Consecutives, - Date, - DateAndTime, - EarlierConsecutives, - ExponentialYear, - Interval, - LaterConsecutives, - Level1Interval, - Level2Interval, # , Testi - LongYear, - MultipleDates, - OneOfASet, - PartialUncertainOrApproximate, - PartialUnspecified, - Season, - UncertainOrApproximate, - Unspecified, -) - -oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) -oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) -oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) -zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) -oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) -oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) -oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) -oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) -zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars="0") -digit = Word(nums, exact=1) - -second = zeroThru59 -minute = zeroThru59 -hour = zeroThru23 -day = oneThru31("day") - -month = oneThru12("month") -monthDay = ( - (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) - ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) - ^ (L("02")("month") + "-" + oneThru29("day")) -) - -# 4 digits, 0 to 9 -positiveYear = Word(nums, exact=4) - -# Negative version of positive year, but "-0000" is illegal -negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) - -year = Combine(positiveYear ^ negativeYear)("year") - -yearMonth = year + "-" + month -yearMonthDay = year + "-" + monthDay # o hai iso date - -date = Combine(year ^ yearMonth ^ yearMonthDay)("date") -Date.set_parser(date) - -zoneOffsetHour = oneThru13 -zoneOffset = L("Z") ^ ( - Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) -) - -baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") - -time = Combine(baseTime + Optional(zoneOffset))("time") - -dateAndTime = date + "T" + time -DateAndTime.set_parser(dateAndTime) - -l0Interval = date("lower") + "/" + date("upper") -Interval.set_parser(l0Interval) - -level0Expression = date ^ dateAndTime ^ l0Interval - - -# (* ************************** Level 1 *************************** *) - -# (* ** Auxiliary Assignments for Level 1 ** *) -UASymbol = Combine(oneOf("? ~ %")) -UA.set_parser(UASymbol) - -seasonNumber = oneOf("21 22 23 24") - -# (* *** Season (unqualified) *** *) -season = year + "-" + seasonNumber("season") -Season.set_parser(season) - -dateOrSeason = date("") ^ season - -# (* *** Long Year - Simple Form *** *) - -longYearSimple = "Y" + Combine( - Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) -)("year") -LongYear.set_parser(longYearSimple) - -# (* *** L1Interval *** *) -uaDateOrSeason = dateOrSeason + Optional(UASymbol) - - -# unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') -# Testi.set_parser(unspecifiedIntervalSec) - - -# bit of a kludge here to get the all the relevant tokens into the parse action -# cleanly otherwise the parameter names are overlapped. -def f(toks): - try: - return {"date": toks[0], "ua": toks[1]} - except IndexError: - return {"date": toks[0], "ua": None} - - -l1Start = ".." ^ uaDateOrSeason -# l1Start = unspecifiedIntervalSec ^ uaDateOrSeason -l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ ".." -l1End.addParseAction(f) - -# level1Interval = l1Start("lower") + "/" + l1End("upper") -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( - "lower" -) + "/" + Optional(l1End("upper")) -Level1Interval.set_parser(level1Interval) - -# (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") -monthUnspecified = year + "-" + L("XX")("month") -dayUnspecified = yearMonth + "-" + L("XX")("day") -dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") - -unspecified = ( - yearWithOneOrTwoUnspecifedDigits - ^ monthUnspecified - ^ dayUnspecified - ^ dayAndMonthUnspecified -) -Unspecified.set_parser(unspecified) - -# (* *** uncertainOrApproxDate *** *) - -uncertainOrApproxDate = date("date") + UASymbol("ua") -UncertainOrApproximate.set_parser(uncertainOrApproxDate) - -level1Expression = ( - uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season -) - -# (* ************************** Level 2 *************************** *) - -# (* ** Internal Unspecified** *) - -digitOrU = Word(nums + "X", exact=1) - -# 2-digit day with at least one 'X' present -dayWithU = Combine(("X" + digitOrU) ^ (digitOrU + "X"))("day") - -# 2-digit month with at least one 'X' present -monthWithU = Combine(oneOf("0X 1X") ^ ("X" + digitOrU))("month") - -# 4-digit year with at least one 'X' present -yearWithU = Combine( - ("X" + digitOrU + digitOrU + digitOrU) - ^ (digitOrU + "X" + digitOrU + digitOrU) - ^ (digitOrU + digitOrU + "X" + digitOrU) - ^ (digitOrU + digitOrU + digitOrU + "X") -)("year") - -yearMonthWithU = (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^ ( - yearWithU + "-" + month -) - -monthDayWithU = (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) ^ ( - monthWithU + "-" + day -) - -yearMonthDayWithU = ( - ( - yearWithU - + "-" - + Combine(month("") ^ monthWithU(""))("month") - + "-" - + Combine(day("") ^ dayWithU(""))("day") - ) - ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) - ^ (year + "-" + month + "-" + dayWithU) -) - -partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU -PartialUnspecified.set_parser(partialUnspecified) - -# (* ** Internal Uncertain or Approximate** *) - -# this line is out of spec, but the given examples (e.g. '(2004)?-06-04~') -# appear to require it. -year_with_brackets = year ^ ("(" + year + ")") - -# second clause below needed Optional() around the "year_ua" UASymbol, for dates -# like '(2011)-06-04~' to work. - -IUABase = ( - ( - year_with_brackets - + UASymbol("year_ua") - + "-" - + month - + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-" - + monthDay - + Optional(UASymbol)("month_day_ua") - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-(" - + month - + ")" - + UASymbol("month_ua") - + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) - ^ ( - year_with_brackets - + Optional(UASymbol)("year_ua") - + "-(" - + month - + ")" - + UASymbol("month_ua") - + Optional("-" + day) - ) - ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) - ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) - ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) - ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) - ^ (season("ssn") + UASymbol("season_ua")) -) - -partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) -PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) - -dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified - -qualifyingString = Regex(r"\S") # any nonwhitespace char - -# (* ** SeasonQualified ** *) -seasonQualifier = qualifyingString -seasonQualified = season + "^" + seasonQualifier - -# (* ** Long Year - Scientific Form ** *) -positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = ( - "Y" - + Combine(Optional("-") + positiveInteger)("base") - + "E" - + positiveInteger("exponent") - + Optional("S" + positiveInteger("precision")) -) -ExponentialYear.set_parser(longYearScientific) - -# (* ** level2Interval ** *) -level2Interval = ( - (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) - ^ ( - dateWithInternalUncertainty("lower") - + "/" - + dateWithInternalUncertainty("upper") - ) -) -Level2Interval.set_parser(level2Interval) - -# (* ** Masked precision ** *) eliminated in latest specs -# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") -# MaskedPrecision.set_parser(maskedPrecision) - -# (* ** Inclusive list and choice list** *) -consecutives = ( - (yearMonthDay("lower") + ".." + yearMonthDay("upper")) - ^ (yearMonth("lower") + ".." + yearMonth("upper")) - ^ (year("lower") + ".." + year("upper")) -) -Consecutives.set_parser(consecutives) - -listElement = ( - date - ^ dateWithInternalUncertainty - ^ uncertainOrApproxDate - ^ unspecified - ^ consecutives -) - -earlier = ".." + date("upper") -EarlierConsecutives.set_parser(earlier) -later = date("lower") + ".." -LaterConsecutives.set_parser(later) - -listContent = ( - (earlier + ZeroOrMore("," + listElement)) - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) - ^ (listElement + OneOrMore("," + listElement)) - ^ consecutives -) - -choiceList = "[" + listContent + "]" -OneOfASet.set_parser(choiceList) - -inclusiveList = "{" + listContent + "}" -MultipleDates.set_parser(inclusiveList) - -level2Expression = ( - partialUncertainOrApproximate - ^ partialUnspecified - ^ choiceList - ^ inclusiveList - ^ level2Interval - ^ longYearScientific - ^ seasonQualified -) - -# putting it all together -edtfParser = ( - level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") -) - - -def parse_edtf(str, parseAll=True, fail_silently=False): - try: - if not str: - raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) - if p: - return p[0] - except ParseException as err: - if fail_silently: - return None - raise EDTFParseException(err) from err diff --git a/edtf/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py deleted file mode 100644 index 857d0f6..0000000 --- a/edtf/parser/parser_classes_tests.py +++ /dev/null @@ -1,834 +0,0 @@ -# ruff: noqa: S101 # Asserts are ok in tests - -import calendar -import re -from datetime import date, datetime -from operator import add, sub -from time import struct_time - -from dateutil.relativedelta import relativedelta - -from edtf import appsettings -from edtf.convert import ( - TIME_EMPTY_EXTRAS, - TIME_EMPTY_TIME, - dt_to_struct_time, - trim_struct_time, -) - -EARLIEST = "earliest" -LATEST = "latest" - -PRECISION_MILLENIUM = "millenium" -PRECISION_CENTURY = "century" -PRECISION_DECADE = "decade" -PRECISION_YEAR = "year" -PRECISION_MONTH = "month" -PRECISION_SEASON = "season" -PRECISION_DAY = "day" - - -def days_in_month(year, month): - """ - Return the number of days in the given year and month, where month is - 1=January to 12=December, and respecting leap years as identified by - `calendar.isleap()` - """ - return { - 1: 31, - 2: 29 if calendar.isleap(year) else 28, - 3: 31, - 4: 30, - 5: 31, - 6: 30, - 7: 31, - 8: 31, - 9: 30, - 10: 31, - 11: 30, - 12: 31, - }[month] - - -def apply_delta(op, time_struct, delta): - """ - Apply a `relativedelta` to a `struct_time` data structure. - - `op` is an operator function, probably always `add` or `sub`tract to - correspond to `a_date + a_delta` and `a_date - a_delta`. - - This function is required because we cannot use standard `datetime` module - objects for conversion when the date/time is, or will become, outside the - boundary years 1 AD to 9999 AD. - """ - if not delta: - return time_struct # No work to do - - try: - dt_result = op(datetime(*time_struct[:6]), delta) - return dt_to_struct_time(dt_result) - except (OverflowError, ValueError): - # Year is not within supported 1 to 9999 AD range - pass - - # Here we fake the year to one in the acceptable range to avoid having to - # write our own date rolling logic - - # Adjust the year to be close to the 2000 millenium in 1,000 year - # increments to try and retain accurate relative leap years - actual_year = time_struct.tm_year - millenium = int(float(actual_year) / 1000) - millenium_diff = (2 - millenium) * 1000 - adjusted_year = actual_year + millenium_diff - # Apply delta to the date/time with adjusted year - dt = datetime(*(adjusted_year,) + time_struct[1:6]) - dt_result = op(dt, delta) - # Convert result year back to its original millenium - final_year = dt_result.year - millenium_diff - return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) - ) - - -class EDTFObject: - """ - Object to attact to a parser to become instantiated when the parser - completes. - """ - - parser = None - - @classmethod - def set_parser(cls, p): - cls.parser = p - p.addParseAction(cls.parse_action) - - @classmethod - def parse_action(cls, toks): - kwargs = toks.asDict() - try: - return cls(**kwargs) # replace the token list with the class - except Exception as e: - print(f"trying to {cls.__name__}.__init__(**{kwargs})") - raise e - - @classmethod - def parse(cls, s): - return cls.parser.parseString(s)[0] - - def __repr__(self): - return f"{type(self).__name__}: '{str(self)}'" - - def __init__(self, *args, **kwargs): - str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" - raise NotImplementedError(f"{str} is not implemented.") - - def __str__(self): - raise NotImplementedError - - def _strict_date(self, lean): - raise NotImplementedError - - def lower_strict(self): - return self._strict_date(lean=EARLIEST) - - def upper_strict(self): - return self._strict_date(lean=LATEST) - - def _get_fuzzy_padding(self, lean): - """ - Subclasses should override this to pad based on how precise they are. - """ - return relativedelta(0) - - def get_is_approximate(self): - return getattr(self, "_is_approximate", False) - - def set_is_approximate(self, val): - self._is_approximate = val - - is_approximate = property(get_is_approximate, set_is_approximate) - - def get_is_uncertain(self): - return getattr(self, "_is_uncertain", False) - - def set_is_uncertain(self, val): - self._is_uncertain = val - - is_uncertain = property(get_is_uncertain, set_is_uncertain) - - def get_is_uncertain_and_approximate(self): - return getattr(self, "_uncertain_and_approximate", False) - - def set_is_uncertain_and_approximate(self, val): - self._uncertain_and_approximate = val - - is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate - ) - - def lower_fuzzy(self): - strict_val = self.lower_strict() - return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - - def upper_fuzzy(self): - strict_val = self.upper_strict() - return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - - def __eq__(self, other): - if isinstance(other, EDTFObject): - return str(self) == str(other) - elif isinstance(other, date): - return str(self) == other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() == trim_struct_time(other) - return False - - def __ne__(self, other): - if isinstance(other, EDTFObject): - return str(self) != str(other) - elif isinstance(other, date): - return str(self) != other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() != trim_struct_time(other) - return True - - def __gt__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() > other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() > dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() > trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __ge__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() >= other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() >= dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() >= trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __lt__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() < other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() < dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() < trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - def __le__(self, other): - if isinstance(other, EDTFObject): - return self.lower_strict() <= other.lower_strict() - elif isinstance(other, date): - return self.lower_strict() <= dt_to_struct_time(other) - elif isinstance(other, struct_time): - return self.lower_strict() <= trim_struct_time(other) - raise TypeError( - f"can't compare {type(self).__name__} with {type(other).__name__}" - ) - - -# (* ************************** Level 0 *************************** *) - - -class Date(EDTFObject): - def set_year(self, y): - if y is None: - raise AttributeError("Year must not be None") - self._year = y - - def get_year(self): - return self._year - - year = property(get_year, set_year) - - def set_month(self, m): - self._month = m - if m is None: - self.day = None - - def get_month(self): - return self._month - - month = property(get_month, set_month) - - def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return - - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - - def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" - return r - - def isoformat(self, default=date.max): - return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), - ) - - def _precise_year(self, lean): - # Replace any ambiguous characters in the year string with 0s or 9s - if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) - else: - return int(re.sub(r"X", r"9", self.year)) - - def _precise_month(self, lean): - if self.month and self.month != "XX": - try: - return int(self.month) - except ValueError as err: - raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" - ) from err - else: - return 1 if lean == EARLIEST else 12 - - def _precise_day(self, lean): - if not self.day or self.day == "XX": - if lean == EARLIEST: - return 1 - else: - return days_in_month( - self._precise_year(LATEST), self._precise_month(LATEST) - ) - else: - return int(self.day) - - def _strict_date(self, lean): - """ - Return a `time.struct_time` representation of the date. - """ - return struct_time( - ( - self._precise_year(lean), - self._precise_month(lean), - self._precise_day(lean), - ) - + tuple(TIME_EMPTY_TIME) - + tuple(TIME_EMPTY_EXTRAS) - ) - - @property - def precision(self): - if self.day: - return PRECISION_DAY - if self.month: - return PRECISION_MONTH - return PRECISION_YEAR - - -class DateAndTime(EDTFObject): - def __init__(self, date, time): - self.date = date - self.time = time - - def __str__(self): - return self.isoformat() - - def isoformat(self): - return self.date.isoformat() + "T" + self.time - - def _strict_date(self, lean): - return self.date._strict_date(lean) - - def __eq__(self, other): - if isinstance(other, datetime): - return self.isoformat() == other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() == trim_struct_time(other) - return super().__eq__(other) - - def __ne__(self, other): - if isinstance(other, datetime): - return self.isoformat() != other.isoformat() - elif isinstance(other, struct_time): - return self._strict_date() != trim_struct_time(other) - return super().__ne__(other) - - -class Interval(EDTFObject): - def __init__(self, lower, upper): - self.lower = lower - self.upper = upper - - def __str__(self): - return f"{self.lower}/{self.upper}" - - def _strict_date(self, lean): - if lean == EARLIEST: - try: - r = self.lower._strict_date(lean) - if r is None: - raise AttributeError - return r - except ( - AttributeError - ): # it's a string, or no date. Result depends on the upper date - upper = self.upper._strict_date(LATEST) - return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) - else: - try: - r = self.upper._strict_date(lean) - if r is None: - raise AttributeError - return r - except ( - AttributeError - ): # an 'unknown' or 'open' string - depends on the lower date - if self.upper and (self.upper == "open" or self.upper.date == "open"): - return dt_to_struct_time(date.today()) # it's still happening - else: - lower = self.lower._strict_date(EARLIEST) - return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) - - -# (* ************************** Level 1 *************************** *) - - -class UA(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = toks.asList() - return cls(*args) - - def __init__(self, *args): - assert len(args) == 1 - ua = args[0] - - self.is_uncertain = "?" in ua - self.is_approximate = "~" in ua - self.is_uncertain_and_approximate = "%" in ua - - def __str__(self): - d = "" - if self.is_uncertain: - d += "?" - if self.is_approximate: - d += "~" - if self.is_uncertain_and_approximate: - d += "%" - return d - - def _get_multiplier(self): - if self.is_uncertain_and_approximate: - return appsettings.MULTIPLIER_IF_BOTH - elif self.is_uncertain: - return appsettings.MULTIPLIER_IF_UNCERTAIN - elif self.is_approximate: - return appsettings.MULTIPLIER_IF_APPROXIMATE - - -class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): - self.date = date - self.ua = ua - - def __str__(self): - if self.ua: - return f"{self.date}{self.ua}" - else: - return str(self.date) - - def _strict_date(self, lean): - if self.date == "open": - return None # depends on the other date - return dt_to_struct_time(date.today()) - if self.date == "unknown": - return None # depends on the other date - return self.date._strict_date(lean) - - def _get_fuzzy_padding(self, lean): - if not self.ua: - return relativedelta(0) - multiplier = self.ua._get_multiplier() - - if self.date.precision == PRECISION_DAY: - return multiplier * appsettings.PADDING_DAY_PRECISION - elif self.date.precision == PRECISION_MONTH: - return multiplier * appsettings.PADDING_MONTH_PRECISION - elif self.date.precision == PRECISION_YEAR: - return multiplier * appsettings.PADDING_YEAR_PRECISION - - -class Testi(EDTFObject): - # @classmethod - # def parse_action(cls, toks): - # args = toks.asList() - # return cls(*args) - - def __init__(self, **args): - print(args) - - -class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): - if sectionOpen: - self.is_open = True - self.is_unknown = False - else: - self.is_open = False - self.is_unknown = True - self.other = other_section_element - - def __str__(self): - if self.is_unknown: - return "" - else: - return ".." - - def _strict_date(self, lean): - if lean == EARLIEST: - if self.is_unknown: - upper = self.other._strict_date(LATEST) - return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) - else: - return dt_to_struct_time( - date.min - ) # from the beginning of time; *ahem, i mean python datetime - else: - if self.is_unknown: - lower = self.other._strict_date(EARLIEST) - return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) - else: - return dt_to_struct_time(date.max) # to then end of python datetime - - -class Unspecified(Date): - pass - - -class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): - if lower: - if lower["date"] == "..": - self.lower = UnspecifiedIntervalSection( - True, UncertainOrApproximate(**upper) - ) - else: - self.lower = UncertainOrApproximate(**lower) - else: - self.lower = UnspecifiedIntervalSection( - False, UncertainOrApproximate(**upper) - ) - if upper: - if upper["date"] == "..": - self.upper = UnspecifiedIntervalSection( - True, UncertainOrApproximate(**lower) - ) - else: - self.upper = UncertainOrApproximate(**upper) - else: - self.upper = UnspecifiedIntervalSection( - False, UncertainOrApproximate(**lower) - ) - - def _get_fuzzy_padding(self, lean): - if lean == EARLIEST: - return self.lower._get_fuzzy_padding(lean) - elif lean == LATEST: - return self.upper._get_fuzzy_padding(lean) - - -class LongYear(EDTFObject): - def __init__(self, year): - self.year = year - - def __str__(self): - return f"Y{self.year}" - - def _precise_year(self): - return int(self.year) - - def _strict_date(self, lean): - py = self._precise_year() - if lean == EARLIEST: - return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - -class Season(Date): - def __init__(self, year, season, **kwargs): - self.year = year - self.season = season # use season to look up month - # day isn't part of the 'season' spec, but it helps the inherited - # `Date` methods do their thing. - self.day = None - - def __str__(self): - return f"{self.year}-{self.season}" - - def _precise_month(self, lean): - rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] - if lean == EARLIEST: - return rng[0] - else: - return rng[1] - - -# (* ************************** Level 2 *************************** *) - - -class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( - self, - year=None, - month=None, - day=None, - year_ua=False, - month_ua=False, - day_ua=False, - year_month_ua=False, - month_day_ua=False, - ssn=None, - season_ua=False, - all_ua=False, - ): - self.year = year - self.month = month - self.day = day - - self.year_ua = year_ua - self.month_ua = month_ua - self.day_ua = day_ua - - self.year_month_ua = year_month_ua - self.month_day_ua = month_day_ua - - self.season = ssn - self.season_ua = season_ua - - self.all_ua = all_ua - - def __str__(self): - if self.season_ua: - return f"{self.season}{self.season_ua}" - - y = f"{self.year}{self.year_ua}" if self.year_ua else str(self.year) - - m = f"({self.month}){self.month_ua}" if self.month_ua else str(self.month) - - if self.day: - d = f"({self.day}){self.day_ua}" if self.day_ua else str(self.day) - else: - d = None - - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = f"{y}-{m}{self.year_month_ua}" - result = f"{ym}-{d}" if d else ym - elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = f"{y}-{m}-{d}{self.month_day_ua}" - else: - result = f"{y}-({m}-{d}){self.month_day_ua}" - else: - result = f"{y}-{m}-{d}" if d else f"{y}-{m}" - - if self.all_ua: - result = f"({result}){self.all_ua}" - - return result - - def _precise_year(self, lean): - if self.season: - return self.season._precise_year(lean) - return super()._precise_year(lean) - - def _precise_month(self, lean): - if self.season: - return self.season._precise_month(lean) - return super()._precise_month(lean) - - def _precise_day(self, lean): - if self.season: - return self.season._precise_day(lean) - return super()._precise_day(lean) - - def _get_fuzzy_padding(self, lean): - """ - This is not a perfect interpretation as fuzziness is introduced for - redundant uncertainly modifiers e.g. (2006~)~ will get two sets of - fuzziness. - """ - result = relativedelta(0) - - if self.year_ua: - result += ( - appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() - ) - if self.month_ua: - result += ( - appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() - ) - if self.day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() - - if self.year_month_ua: - result += ( - appsettings.PADDING_YEAR_PRECISION - * self.year_month_ua._get_multiplier() - ) - result += ( - appsettings.PADDING_MONTH_PRECISION - * self.year_month_ua._get_multiplier() - ) - if self.month_day_ua: - result += ( - appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - ) - result += ( - appsettings.PADDING_MONTH_PRECISION - * self.month_day_ua._get_multiplier() - ) - - if self.season_ua: - result += ( - appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() - ) - - if self.all_ua: - multiplier = self.all_ua._get_multiplier() - - if self.precision == PRECISION_DAY: - result += multiplier * appsettings.PADDING_DAY_PRECISION - result += multiplier * appsettings.PADDING_MONTH_PRECISION - result += multiplier * appsettings.PADDING_YEAR_PRECISION - elif self.precision == PRECISION_MONTH: - result += multiplier * appsettings.PADDING_MONTH_PRECISION - result += multiplier * appsettings.PADDING_YEAR_PRECISION - elif self.precision == PRECISION_YEAR: - result += multiplier * appsettings.PADDING_YEAR_PRECISION - - return result - - -class PartialUnspecified(Unspecified): - pass - - -class Consecutives(Interval): - # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): - if lower and not isinstance(lower, EDTFObject): - self.lower = Date.parse(lower) - else: - self.lower = lower - - if upper and not isinstance(upper, EDTFObject): - self.upper = Date.parse(upper) - else: - self.upper = upper - - def __str__(self): - return "{}..{}".format(self.lower or "", self.upper or "") - - -class EarlierConsecutives(Consecutives): - pass - - -class LaterConsecutives(Consecutives): - pass - - -class OneOfASet(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = [t for t in toks.asList() if isinstance(t, EDTFObject)] - return cls(*args) - - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) - - def _strict_date(self, lean): - if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) - - -class MultipleDates(EDTFObject): - @classmethod - def parse_action(cls, toks): - args = [t for t in toks.asList() if isinstance(t, EDTFObject)] - return cls(*args) - - def __init__(self, *args): - self.objects = args - - def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) - - def _strict_date(self, lean): - if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) - - -class MaskedPrecision(Date): - pass - - -class Level2Interval(Level1Interval): - def __init__(self, lower, upper): - # Check whether incoming lower/upper values are single-item lists, and - # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of - # `PartialUncertainOrApproximate` items for lower/upper values. - if isinstance(lower, (tuple, list)) and len(lower) == 1: - self.lower = lower[0] - else: - self.lower = lower - if isinstance(lower, (tuple, list)) and len(upper) == 1: - self.upper = upper[0] - else: - self.upper = upper - - -class ExponentialYear(LongYear): - def __init__(self, base, exponent, precision=None): - self.base = base - self.exponent = exponent - self.precision = precision - - def _precise_year(self): - return int(self.base) * 10 ** int(self.exponent) - - def get_year(self): - if self.precision: - return f"{self.base}E{self.exponent}S{self.precision}" - else: - return f"{self.base}E{self.exponent}" - - year = property(get_year) diff --git a/vagrant wheel install problems.txt b/vagrant wheel install problems.txt deleted file mode 100644 index 174f67e..0000000 --- a/vagrant wheel install problems.txt +++ /dev/null @@ -1,5 +0,0 @@ -vagrant wheel install problems -https://stackoverflow.com/questions/56851961/how-to-fix-no-such-file-or-directory-error-in-setuptools-wheel-py157-convert - -from that link: -So it turns out that this problem was being caused by lag in Vagrant/Virtualbox's synced folders. I was trying to build the Python project inside a Vagrant VM shared from the host file system using a synced folder. Copying the project out of the synced folder into another folder in the VM allows it to build. Another dirty hack that worked was to add a time.sleep(1) in the setuptools/wheel.py source file on line 157 before the os.rename that was causing the OS Exception to be raised. This gives the file system a chance to sync, and therefore works around the issue. \ No newline at end of file From 9f2b55066beb18de154f6ca5d62b7f0474e37740 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:37:48 -0400 Subject: [PATCH 017/135] Fix matrix --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be0326d..34a2001 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,8 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.12"] - # python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] defaults: run: working-directory: . From 0eeb9bdce49f6f50c4c968a60627c8d50d1e174c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 22 May 2024 18:52:47 -0400 Subject: [PATCH 018/135] Try adding permissions --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34a2001..fb06083 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,11 @@ on: workflow_dispatch: pull_request: +permissions: + contents: write + pull-requests: write + + jobs: python-unit: runs-on: ubuntu-latest From 7a02fcd8403277f31ea7a0e3e742a68b0733e704 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:43:40 +1000 Subject: [PATCH 019/135] Try adding checks: permission too --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb06083..22590c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,7 @@ on: pull_request: permissions: + checks: write contents: write pull-requests: write From 54d5ec7cee810cc87ef42d94a94c269abe11a157 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:46:38 +1000 Subject: [PATCH 020/135] Run CI on push too --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22590c3..9cbb841 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,9 @@ name: CI on: - workflow_dispatch: pull_request: + push: + workflow_dispatch: permissions: checks: write From 0b92096dacc2d2047346ad6e59e5fc829fd1250a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 09:54:48 +1000 Subject: [PATCH 021/135] Add id to Coverage Comment step --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9cbb841..07420a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,6 +60,7 @@ jobs: coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" - name: Pytest coverage comment + id: coverageComment uses: MishaKav/pytest-coverage-comment@main with: pytest-xml-coverage-path: ./coverage_combined.xml From 642819638506ae1c1ad06527c94706a1ccbfebe5 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:14:01 +1000 Subject: [PATCH 022/135] Remove html from printed output --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07420a0..1618350 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,7 +71,6 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" - echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" From b8fdbef35aac415ee35251e561afdf33f3dc2a80 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:22:20 +1000 Subject: [PATCH 023/135] Remove not-success from printed output --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1618350..767ebf0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,7 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + # echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" @@ -78,4 +79,4 @@ jobs: echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" - echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" + # echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" From f262199f120b49d13c48e4110c4d56929b9d99fb Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 23 May 2024 10:28:44 +1000 Subject: [PATCH 024/135] Remove all junit-xml items from printed report (for now at least..) --- .github/workflows/ci.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 767ebf0..d5416ed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,12 +71,4 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" - # echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" - echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" - echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" - echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" - echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" - echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" - echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" - # echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" From 5d3d80c1c7c5c550808557c8e7dfb4b7682351b0 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 11:25:54 -0400 Subject: [PATCH 025/135] Add back JUnit reporting stats - Switch to using pytest-django to run the Django tests, as that has JUnit support. Add Django settings as a flag rather than in pyproject.toml because defining it there makes the normal pytest run fail since it can't find the module. - Adds a simple script using junitparser to combine the two JUnit XML files. --- .github/workflows/ci.yml | 17 +++++++++++++++-- .gitignore | 4 ++++ combine_junit.py | 23 +++++++++++++++++++++++ pyproject.toml | 6 ++++-- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 combine_junit.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5416ed..cb9d7f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,13 +44,13 @@ jobs: - name: Run unit tests run: | - pytest + pytest --junitxml=junit_pytest_main.xml mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - coverage run manage.py test edtf_integration + pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml mv .coverage ../.coverage_django - name: Combine coverage reports @@ -59,11 +59,16 @@ jobs: coverage report --omit="edtf_django_tests/*" coverage xml -o coverage_combined.xml --omit="edtf_django_tests/*" + - name: Combine JUnit XML reports + run: | + python combine_junit.py combined_junit_pytest.xml junit_pytest_main.xml junit_pytest_django.xml + - name: Pytest coverage comment id: coverageComment uses: MishaKav/pytest-coverage-comment@main with: pytest-xml-coverage-path: ./coverage_combined.xml + junitxml-path: ./combined_junit_pytest.xml unique-id-for-comment: ${{ matrix.python-version }} github-token: ${{ secrets.GITHUB_TOKEN }} @@ -71,4 +76,12 @@ jobs: run: | echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" + echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" + echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" + echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" + echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" + echo "Coverage Skipped - ${{ steps.coverageComment.outputs.skipped }}" + echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" + echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" + echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" diff --git a/.gitignore b/.gitignore index 182cf8b..36df893 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,10 @@ coverage_combined.xml .coverage_main .coverage_django *,cover +combined_junit_pytest.xml +pytest.xml +junit_pytest_main.xml +junit_pytest_django.xml # Translations *.mo diff --git a/combine_junit.py b/combine_junit.py new file mode 100644 index 0000000..5e3a05b --- /dev/null +++ b/combine_junit.py @@ -0,0 +1,23 @@ +import sys + +from junitparser import JUnitXml + + +def combine_junit_xml(output_file, *input_files): + combined_xml = JUnitXml() + for input_file in input_files: + xml = JUnitXml.fromfile(input_file) + combined_xml.extend(xml) + combined_xml.write(output_file) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print( + "Usage: python combine_junit_xml.py ... " + ) + sys.exit(1) + + output_file = sys.argv[1] + input_files = sys.argv[2:] + combine_junit_xml(output_file, *input_files) diff --git a/pyproject.toml b/pyproject.toml index 8dea9fd..64579ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,10 +38,12 @@ classifiers = [ test = [ "django>=4.2,<5.0", "pytest", + "pytest-django", "ruff", "pre-commit", "coverage", - "pytest-cov" + "pytest-cov", + "junitparser", ] [project.urls] @@ -79,7 +81,7 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf --cov-report=xml" +addopts = "--ignore=edtf_django_tests/ --cov=edtf" plugins = ["pytest_cov"] [tool.coverage.run] From 6771172126ca4e9fb84beb7b4d7f60724bf3434b Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 11:48:38 -0400 Subject: [PATCH 026/135] Skip covered due to long comments ``` File read successfully "/home/runner/work/python-edtf/python-edtf/./combined_junit_pytest.xml" Warning: Your comment is too long (maximum is 65536 characters), coverage report will not be added. Warning: Try add: "--cov-report=term-missing:skip-covered", or add "hide-report: true", or add "report-only-changed-files: true", or switch to "multiple-files" mode ``` --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb9d7f3..09b9c65 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,13 +44,13 @@ jobs: - name: Run unit tests run: | - pytest --junitxml=junit_pytest_main.xml + pytest --junitxml=junit_pytest_main.xml --cov-report=term-missing:skip-covered mv .coverage .coverage_main - name: Run Django integration tests working-directory: ./edtf_django_tests run: | - pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml + pytest edtf_integration/tests.py --ds=edtf_django_tests.settings --junitxml=../junit_pytest_django.xml --cov-report=term-missing:skip-covered mv .coverage ../.coverage_django - name: Combine coverage reports From 7e15e8909b528b5e5979a52f47c9ba692d041030 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Fri, 24 May 2024 11:28:07 +1000 Subject: [PATCH 027/135] Summary report comes with its own quotes #53 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09b9c65..95c29c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,7 +77,7 @@ jobs: echo "Coverage Percentage - ${{ steps.coverageComment.outputs.coverage }}" echo "Coverage Color - ${{ steps.coverageComment.outputs.color }}" echo "Coverage Html - ${{ steps.coverageComment.outputs.coverageHtml }}" - echo "Summary Report - ${{ steps.coverageComment.outputs.summaryReport }}" + echo "Summary Report -" ${{ steps.coverageComment.outputs.summaryReport }} echo "Coverage Warnings - ${{ steps.coverageComment.outputs.warnings }}" echo "Coverage Errors - ${{ steps.coverageComment.outputs.errors }}" echo "Coverage Failures - ${{ steps.coverageComment.outputs.failures }}" From 7556479586ab46ea8641ea74d827ffeba0f0c063 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 12:03:48 -0400 Subject: [PATCH 028/135] Finish removing masked precision --- README.md | 6 ------ edtf/parser/grammar.py | 4 ---- edtf/parser/parser_classes.py | 4 ---- 3 files changed, 14 deletions(-) diff --git a/README.md b/README.md index c4f172e..fc9fe75 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,6 @@ The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_c PartialUnspecified OneOfASet MultipleDates - MaskedPrecision Level2Interval Level2Season ExponentialYear @@ -158,11 +157,6 @@ Test coverage includes every example given in the spec table of features. >>> parse_edtf('{1667,1668, 1670..1672}') MultipleDates: '{1667, 1668, 1670..1672}' -* Masked precision: - - >>> parse_edtf('197x') # A date in the 1970s. - MaskedPrecision: '197x' - * Level 2 Extended intervals: >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 730f47d..0eb2e9c 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -260,10 +260,6 @@ def f(toks): ) Level2Interval.set_parser(level2Interval) -# (* ** Masked precision ** *) eliminated in latest specs -# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year") -# MaskedPrecision.set_parser(maskedPrecision) - # (* ** Inclusive list and choice list** *) consecutives = ( (yearMonthDay("lower") + ".." + yearMonthDay("upper")) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index bb9a213..b31ffeb 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -806,10 +806,6 @@ def _strict_date(self, lean): return min([x._strict_date(lean) for x in self.objects]) -class MaskedPrecision(Date): - pass - - class Level2Interval(Level1Interval): def __init__(self, lower, upper): # Check whether incoming lower/upper values are single-item lists, and From 3ce6e875f79d496e61f9627d5e6887853d668db6 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 12:28:04 -0400 Subject: [PATCH 029/135] Add all required tests for significant digits Significant digits should work on a year in any format: "four-digit, 'Y' prefix, or exponential." These correspond to the python-edtf classes of Date, LongYear, and ExponentialYear. --- edtf/parser/tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 8d9a770..6e0a8a1 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -193,8 +193,14 @@ # the year -170000000 ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits + # Some year between 1900 and 1999, estimated to be 1950 + ("1950S2", ("1900-01-01", "1999-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), + ("Y17101E4S3", ("171000000-01-01", "171999999-12-31")), + # Some year between 338000 and 338999, estimated to be 338800 + ("Y3388E2S3", ("338000-01-01", "338999-12-31")), + # some year between 171000000 and 171999999 estimated to be 171010000 + ("Y171010000S3", ("171010000-01-01", "171999999-12-31")), # L2 Seasons # Spring southern hemisphere, 2001 ("2001-29", ("2001-09-01", "2001-11-30")), From 7545b6a56127bea3459ead8dbc5e28f502014dd9 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 13:37:49 -0400 Subject: [PATCH 030/135] Parse significant digits for year, Y-prefixed, exponential - Add significant digit parsing for Date (year) and LongYear (y-prefixed) - Standardize grammar for significant digits - Use significant_digits rather than precision. Precision is used throughout for other functionality. - Add estimated() public functions for the above EDTF classes --- edtf/parser/grammar.py | 21 ++++++++++++++------- edtf/parser/parser_classes.py | 32 +++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 0eb2e9c..15947d0 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -48,8 +48,9 @@ oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) -positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) +positiveDigit = Word(nums, exact=1, excludeChars="0") +positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) second = zeroThru59 minute = zeroThru59 @@ -63,13 +64,16 @@ ^ (L("02")("month") + "-" + oneThru29("day")) ) +# Significant digits suffix +significantDigits = "S" + Word(nums)("significant_digits") + # 4 digits, 0 to 9 positiveYear = Word(nums, exact=4) # Negative version of positive year, but "-0000" is illegal negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) -year = Combine(positiveYear ^ negativeYear)("year") +year = Combine(positiveYear ^ negativeYear)("year") + Optional(significantDigits) yearMonth = year + "-" + month yearMonthDay = year + "-" + monthDay # o hai iso date @@ -112,9 +116,13 @@ # (* *** Long Year - Simple Form *** *) -longYearSimple = "Y" + Combine( - Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit) -)("year") +longYearSimple = ( + "Y" + + Combine(Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit))( + "year" + ) + + Optional(significantDigits) +) LongYear.set_parser(longYearSimple) # (* *** L1Interval *** *) @@ -238,13 +246,12 @@ def f(toks): seasonQualified = season + "^" + seasonQualifier # (* ** Long Year - Scientific Form ** *) -positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) longYearScientific = ( "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + positiveInteger("exponent") - + Optional("S" + positiveInteger("precision")) + + Optional(significantDigits) ) ExponentialYear.set_parser(longYearScientific) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b31ffeb..09140b6 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -261,7 +261,9 @@ def get_month(self): month = property(get_month, set_month) - def __init__(self, year=None, month=None, day=None, **kwargs): + def __init__( + self, year=None, month=None, day=None, significant_digits=None, **kwargs + ): for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) @@ -270,6 +272,7 @@ def __init__(self, year=None, month=None, day=None, **kwargs): self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day + self.significant_digits = significant_digits def __str__(self): r = self.year @@ -277,6 +280,8 @@ def __str__(self): r += f"-{self.month}" if self.day: r += f"-{self.day}" + if self.significant_digits: + r += f"S{self.significant_digits}" return r def isoformat(self, default=date.max): @@ -337,6 +342,9 @@ def precision(self): return PRECISION_MONTH return PRECISION_YEAR + def estimated(self): + return self._precise_year(EARLIEST) + class DateAndTime(EDTFObject): def __init__(self, date, time): @@ -537,11 +545,15 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): - def __init__(self, year): + def __init__(self, year, significant_digits=None): self.year = year + self.significant_digits = significant_digits def __str__(self): - return f"Y{self.year}" + if self.significant_digits: + return f"Y{self.year}S{self.significant_digits}" + else: + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -553,6 +565,9 @@ def _strict_date(self, lean): else: return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + def estimated(self): + return self._precise_year() + class Season(Date): def __init__(self, year, season, **kwargs): @@ -827,18 +842,21 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, precision=None): + def __init__(self, base, exponent, significant_digits=None): self.base = base self.exponent = exponent - self.precision = precision + self.significant_digits = significant_digits def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) def get_year(self): - if self.precision: - return f"{self.base}E{self.exponent}S{self.precision}" + if self.significant_digits: + return f"{self.base}E{self.exponent}S{self.significant_digits}" else: return f"{self.base}E{self.exponent}" year = property(get_year) + + def estimated(self): + return self._precise_year() From 6b3a9d46d10fddd1941b900c610fdcee579b97fe Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 23 May 2024 15:03:32 -0400 Subject: [PATCH 031/135] Fix a regression with Consecutives / OneOfASet Two tests were failing: ``` FAILED edtf/parser/tests.py::test_edtf_examples[[1667, 1668, 1670..1672]-expected_tuple62] - AttributeError: 'list' object has no attribute 'expandtabs' FAILED edtf/parser/tests.py::test_edtf_examples[{1667,1668, 1670..1672}-expected_tuple67] - AttributeError: 'list' object has no attribute 'expandtabs' ``` pyparsing.parse_string() was being passed a list by year somehow. Added year_basic for this use case (4 digit year without significant digits). If we need to support Consecutives with significant digits then this isn't a sufficient workaround. --- edtf/parser/grammar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 15947d0..e6232c4 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -74,6 +74,8 @@ negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) year = Combine(positiveYear ^ negativeYear)("year") + Optional(significantDigits) +# simple version for Consecutives +year_basic = Combine(positiveYear ^ negativeYear)("year") yearMonth = year + "-" + month yearMonthDay = year + "-" + monthDay # o hai iso date @@ -271,7 +273,9 @@ def f(toks): consecutives = ( (yearMonthDay("lower") + ".." + yearMonthDay("upper")) ^ (yearMonth("lower") + ".." + yearMonth("upper")) - ^ (year("lower") + ".." + year("upper")) + ^ ( + year_basic("lower") + ".." + year_basic("upper") + ) # using year_basic because some tests were throwing `'list' object has no attribute 'expandtabs'` - somewhere, pyparsing.parse_string() was being passed a list ) Consecutives.set_parser(consecutives) From 5883f539e7523fbabc7eb5075a1a4d279c2a8333 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Fri, 24 May 2024 15:58:57 -0400 Subject: [PATCH 032/135] Significant digits updates - Adds functionality for significant digits to Date, LongYear, and ExponentialYear - Updates the tests for significant digits - Updates the docs for significant digits and a few other references to old syntax (lowercase e, grouping) - ExponentialYear inherits from LongYear so only need to add it there; LongYear does not inherit from Date, so a bit of code duplication in the _fuzzy() overrides --- README.md | 42 +++++++++++++++---- edtf/parser/parser_classes.py | 78 +++++++++++++++++++++++++++++++++-- edtf/parser/tests.py | 20 ++++++--- 3 files changed, 124 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index fc9fe75..449912c 100644 --- a/README.md +++ b/README.md @@ -138,9 +138,8 @@ Test coverage includes every example given in the spec table of features. * Partial uncertain/approximate: - >>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate. - # Note that the result text is normalized - PartialUncertainOrApproximate: '2011-(06-04)~' + >>> parse_edtf('2004-06~-11') # year certain, month/day approximate. + PartialUncertainOrApproximate: '2004-06~-11' * Partial unspecified: @@ -159,13 +158,42 @@ Test coverage includes every example given in the spec table of features. * Level 2 Extended intervals: - >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') - Level2Interval: '2004-06-(01)~/2004-06-(20)~' + >>> parse_edtf('2004-06-~01/2004-06-~20') + Level2Interval: '2004-06-~01/2004-06-~20' * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('Y-17e7') - ExponentialYear: 'Y-17e7' + >>> e = parse_edtf('Y-17E7') + ExponentialYear: 'Y-17E7' + >>> e.estimated() + -170000000 + +* Significant digits: + # '1950S2': some year between 1900 and 1999, estimated to be 1950 + >>> d = parse_edtf('1950S2') + Date: '1950S2' + >>> d.lower_fuzzy()[:3] + (1900, 1, 1) + >>> d.upper_fuzzy()[:3] + (1999, 12, 31) + # 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. + >>> l = parse_edtf('Y171010000S3') + LongYear: 'Y171010000S3' + >>> l.estimated() + 171010000 + >>> l.lower_fuzzy()[:3] + (171000000, 1, 1) + >>> l.upper_fuzzy()[:3] + (171999999, 12, 31) + # 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 + >>> e = parse_edtf('Y3388E2S3') + ExponentialYear: 'Y3388E2S3S3' + >>> e.estimated() + 338800 + >>> e.lower_fuzzy()[:3] + (338000, 1, 1) + >>> e.upper_fuzzy()[:3] + (338999, 12, 31) ### Natural language representation diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 09140b6..e12ecbd 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -272,7 +272,9 @@ def __init__( self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): r = self.year @@ -291,6 +293,36 @@ def isoformat(self, default=date.max): int(self.day or default.day), ) + def lower_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + lower_year = ( + int(self.year) + // (10**insignificant_digits) + * (10**insignificant_digits) + ) + return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def upper_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + add, self.upper_strict(), self._get_fuzzy_padding(LATEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + 10**insignificant_digits + ) - 1 + return struct_time( + [upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS + ) + def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: @@ -547,7 +579,9 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): def __init__(self, year, significant_digits=None): self.year = year - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): if self.significant_digits: @@ -568,6 +602,42 @@ def _strict_date(self, lean): def estimated(self): return self._precise_year() + def lower_fuzzy(self): + full_year = self._precise_year() + strict_val = self.lower_strict() + if not self.significant_digits: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + else: + insignificant_digits = len(str(full_year)) - int(self.significant_digits) + if insignificant_digits <= 0: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + lower_year = sig_digits * padding_value + return apply_delta( + sub, + struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(EARLIEST), + ) + + def upper_fuzzy(self): + full_year = self._precise_year() + strict_val = self.upper_strict() + if not self.significant_digits: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + else: + insignificant_digits = len(str(full_year)) - self.significant_digits + if insignificant_digits <= 0: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + upper_year = (sig_digits + 1) * padding_value - 1 + return apply_delta( + add, + struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(LATEST), + ) + class Season(Date): def __init__(self, year, season, **kwargs): @@ -845,7 +915,9 @@ class ExponentialYear(LongYear): def __init__(self, base, exponent, significant_digits=None): self.base = base self.exponent = exponent - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 6e0a8a1..1ec7452 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -14,8 +14,8 @@ # where the first value is a tuple, the second item is a tuple of the normalised parse result. # # The values in the second tuple indicate the iso versions of the derived Python `date`s. -# - If there's one other value, all the derived dates should be the same. -# - If there're two other values, then all the lower values should be the same +# - If there is one other value, all the derived dates should be the same. +# - If there are two other values, then all the lower values should be the same # and all the upper values should be the same. # - If there are three other values, then the upper and lower ``_strict`` values # should be the first value, and the upper and lower ``_fuzzy`` values should be @@ -194,13 +194,21 @@ ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits # Some year between 1900 and 1999, estimated to be 1950 - ("1950S2", ("1900-01-01", "1999-12-31")), + ("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")), + ("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")), + ("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - ("Y17101E4S3", ("171000000-01-01", "171999999-12-31")), + ( + "Y17101E4S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # Some year between 338000 and 338999, estimated to be 338800 - ("Y3388E2S3", ("338000-01-01", "338999-12-31")), + ("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")), # some year between 171000000 and 171999999 estimated to be 171010000 - ("Y171010000S3", ("171010000-01-01", "171999999-12-31")), + ( + "Y171010000S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # L2 Seasons # Spring southern hemisphere, 2001 ("2001-29", ("2001-09-01", "2001-11-30")), From a6c869e5f32a7ae93fdcae5fd87172d2d4a8f28d Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 11:36:37 +1000 Subject: [PATCH 033/135] Minor fix to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 449912c..a571813 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,7 @@ Test coverage includes every example given in the spec table of features. -170000000 * Significant digits: + # '1950S2': some year between 1900 and 1999, estimated to be 1950 >>> d = parse_edtf('1950S2') Date: '1950S2' @@ -176,7 +177,7 @@ Test coverage includes every example given in the spec table of features. (1900, 1, 1) >>> d.upper_fuzzy()[:3] (1999, 12, 31) - # 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. + # 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. >>> l = parse_edtf('Y171010000S3') LongYear: 'Y171010000S3' >>> l.estimated() From 3a1f4368635c16f8929473db2f75b2071692a00a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 11:44:24 +1000 Subject: [PATCH 034/135] Add limited benchmark tests No CI yet. #50 --- README.md | 1 + edtf/natlang/tests.py | 24 ++++++++++++++++++++++++ edtf/parser/grammar.py | 9 +++++++++ edtf/parser/tests.py | 21 +++++++++++++++++++++ pyproject.toml | 8 ++++++-- 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a571813..76476c5 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 78ecbc9..d2c43a5 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -185,3 +185,27 @@ def test_natlang(input_text, expected_output): assert ( result == expected_output ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + "input_text,expected_output", + [ + ("23rd Dynasty", None), + ("January 2008", "2008-01"), + ("ca1860", "1860~"), + ("uncertain: approx 1862", "1862%"), + ("January", "XXXX-01"), + ("Winter 1872", "1872-24"), + ("before approx January 18 1928", "/1928-01-18~"), + ("birthday in 1872", "1872"), + ("1270 CE", "1270"), + ("2nd century bce", "-01XX"), + ("1858/1860", "[1858, 1860]"), + ], +) +def test_benchmark_natlang(benchmark, input_text, expected_output): + """ + Benchmark selected natural language conversions + """ + benchmark(text_to_edtf, input_text) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index e6232c4..1747341 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,3 +1,11 @@ +# ruff: noqa: E402 I001 + +# It's recommended to `enablePackrat()` immediately after importing pyparsing +# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips +import pyparsing + +pyparsing.ParserElement.enablePackrat() + from pyparsing import ( Combine, NotAny, @@ -13,6 +21,7 @@ ) from pyparsing import Literal as L + from edtf.parser.edtf_exceptions import EDTFParseException # (* ************************** Level 0 *************************** *) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 1ec7452..4932e95 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -216,6 +216,20 @@ ("2001-34", ("2001-04-01", "2001-06-30")), ) +BENCHMARK_EXAMPLES = ( + "2001-02-03", + "2008-12", + "2008", + "-0999", + "2004-01-01T10:10:10+05:00", + "-2005/-1999-02", + "/2006", + "?2004-%06", + "[1667, 1760-12]", + "Y3388E2S3", + "2001-29", +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -340,3 +354,10 @@ def test_comparisons(): assert d4 == d5 assert d1 < d5 assert d1 > d6 + + +@pytest.mark.benchmark +@pytest.mark.parametrize("test_input", BENCHMARK_EXAMPLES) +def test_benchmark_parser(benchmark, test_input): + """Benchmark parsing of selected EDTF strings.""" + benchmark(parse, test_input) diff --git a/pyproject.toml b/pyproject.toml index 64579ae..56978fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ test = [ "django>=4.2,<5.0", "pytest", "pytest-django", + "pytest-benchmark", "ruff", "pre-commit", "coverage", @@ -81,8 +82,11 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf" -plugins = ["pytest_cov"] +markers = [ + "benchmark: mark a test as a benchmark", +] +addopts = "--ignore=edtf_django_tests/ --cov=edtf -m 'not benchmark'" +plugins = ["pytest_cov", "pytest_benchmark"] [tool.coverage.run] # we run the edtf_integration tests but only care about them testing fields.py in the main package From 6e7b1093a43cd70906a7402a01621a0f1a195b3b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 13:00:34 +1000 Subject: [PATCH 035/135] Add benchmark to CI #50 --- .github/workflows/ci.yml | 18 ++++++++++++++++++ dev-requirements.txt | 2 ++ edtf/parser/grammar.py | 5 +++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95c29c6..39d0f4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,8 @@ on: permissions: checks: write contents: write + # deployments permission to deploy GitHub pages website + deployments: write pull-requests: write @@ -85,3 +87,19 @@ jobs: echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" + + - name: Run benchmarks + run: | + pytest -m benchmark --benchmark-json=./output.json + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'pytest' + auto-push: false + output-file-path: output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + save-data-file: false + skip-fetch-gh-pages: true + summary-always: true diff --git a/dev-requirements.txt b/dev-requirements.txt index 1e37df5..19242af 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ -r requirements.txt # Include all main requirements django>=4.2,<5.0 pytest +pytest-benchmark +pytest-django ruff pre-commit diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1747341..9840bde 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -2,9 +2,10 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -import pyparsing -pyparsing.ParserElement.enablePackrat() +# TODO: uncomment this once benchmark testing has run once in CI +# import pyparsing +# pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 0ab80edfc0d0016490765b27f145e87332a22b42 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:12:38 +1000 Subject: [PATCH 036/135] Prevent gh-pages push --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39d0f4e..f30ea57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,9 +97,9 @@ jobs: with: tool: 'pytest' auto-push: false + comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true save-data-file: false - skip-fetch-gh-pages: true summary-always: true From 34363577027222d6ce94a92e0dc10a8935f01d44 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:45:34 +1000 Subject: [PATCH 037/135] Add gh-pages push --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f30ea57..ec93df0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,12 +94,13 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name != 'pull_request' with: tool: 'pytest' - auto-push: false + auto-push: true comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true - save-data-file: false + save-data-file: true summary-always: true From 23a3d7e1de070bb0156e06d5ac7a91cf081d00e6 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:34:42 +1000 Subject: [PATCH 038/135] Make 2 CI paths #50 --- .github/workflows/ci.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec93df0..370258a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,6 +101,18 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: true + comment-on-alert: false + save-data-file: true + summary-always: true + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name == 'pull_request' + with: + tool: 'pytest' + auto-push: false + comment-always: true + output-file-path: output.json + comment-on-alert: false save-data-file: true summary-always: true From bb6e64052487511a23e256db10ca74308dd5c11b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:39:25 +1000 Subject: [PATCH 039/135] Store/retrieve previous results --- .github/workflows/ci.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 370258a..fefb0c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,12 @@ jobs: run: | pytest -m benchmark --benchmark-json=./output.json + - name: Download previous benchmark data + uses: actions/cache@v4 + with: + path: ./cache + key: ${{ runner.os }}-benchmark + - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 if: github.event_name != 'pull_request' @@ -101,11 +107,13 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: false + comment-on-alert: true save-data-file: true summary-always: true + # Where the previous data file is stored + external-data-json-path: ./cache/benchmark-data.json - - name: Publish benchmark results + - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 if: github.event_name == 'pull_request' with: @@ -116,3 +124,4 @@ jobs: comment-on-alert: false save-data-file: true summary-always: true + external-data-json-path: ./cache/benchmark-data.json From 13a8315234dae048461e8b2bd53b840f0bea8e12 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:44:12 +1000 Subject: [PATCH 040/135] Do not auto-push when using external-data file --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fefb0c2..a13671e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -110,12 +110,9 @@ jobs: comment-on-alert: true save-data-file: true summary-always: true - # Where the previous data file is stored - external-data-json-path: ./cache/benchmark-data.json - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name == 'pull_request' with: tool: 'pytest' auto-push: false From 57af55917d8baba8c334ab2bf7c0bce0d465d0ed Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:47:14 +1000 Subject: [PATCH 041/135] GH token required for comment-always --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a13671e..0f97b3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -116,6 +116,7 @@ jobs: with: tool: 'pytest' auto-push: false + github-token: ${{ secrets.GITHUB_TOKEN }} comment-always: true output-file-path: output.json comment-on-alert: false From 90558b6bede78d310755e303328745ad4c70c087 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:16:32 +1000 Subject: [PATCH 042/135] Activate packrat #50 --- edtf/parser/grammar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 9840bde..dc0f66d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -3,9 +3,9 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -# TODO: uncomment this once benchmark testing has run once in CI -# import pyparsing -# pyparsing.ParserElement.enablePackrat() +import pyparsing + +pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 6c0e23990a259e2bd66f41781d950940e015e379 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:30:08 +1000 Subject: [PATCH 043/135] Include benchmark url --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 76476c5..9fc6ede 100644 --- a/README.md +++ b/README.md @@ -401,7 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` -- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` From ef24bc71dbd5d9d8edae57f0cc1aea182c88f12a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 27 May 2024 21:32:43 -0400 Subject: [PATCH 044/135] Handle unspecified and qualified ("16XX~") Unspecified dates previously could not handle qualification. Unspecified dates also couldn't handle dates with 3 unspecified digits ("1XXX"). This commit adds both those features and tests for those use cases. --- edtf/appsettings.py | 7 +++ edtf/parser/grammar.py | 8 ++-- edtf/parser/parser_classes.py | 84 ++++++++++++++++++++++++++++++++++- edtf/parser/tests.py | 7 +++ 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e1bc821..e00a223 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -87,6 +87,13 @@ PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) +PADDING_CENTURY_PRECISION = EDTF.get( + "PADDING_CENTURY_PRECISION", relativedelta(years=100) +) +PADDING_MILLENNIUM_PRECISION = EDTF.get( + "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) +) MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index dc0f66d..ae03251 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -161,17 +161,19 @@ def f(toks): Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") +yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( + digit + (digit ^ "X") + (digit ^ "X") + "X" +)("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") unspecified = ( - yearWithOneOrTwoUnspecifedDigits + yearWithOneOrTwoOrThreeUnspecifedDigits ^ monthUnspecified ^ dayUnspecified ^ dayAndMonthUnspecified -) +) + Optional(UASymbol)("ua") Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index e12ecbd..0bbf855 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -541,7 +541,89 @@ def precision(self): class Unspecified(Date): - pass + def __init__( + self, + year=None, + month=None, + day=None, + significant_digits=None, + ua=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.month = month + self.day = day + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) + self.ua = ua if ua else None + + def __str__(self): + r = self.year + if self.month: + r += f"-{self.month}" + if self.day: + r += f"-{self.day}" + if self.ua: + r += str(self.ua) + return r + + def _get_fuzzy_padding(self, lean): + if not self.ua: + return relativedelta() + multiplier = self.ua._get_multiplier() + padding = relativedelta() + + if self.year: + if self.precision == PRECISION_MILLENIUM: + padding += relativedelta( + years=int( + multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years + ) + ) + elif self.precision == PRECISION_CENTURY: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) + + return padding + + @property + def precision(self): + if self.day: + return PRECISION_DAY + if self.month: + return PRECISION_MONTH + if self.year: + if self.year.isdigit(): + return PRECISION_YEAR + if len(self.year) == 4 and self.year.endswith("XXX"): + return PRECISION_MILLENIUM + if len(self.year) == 4 and self.year.endswith("XX"): + return PRECISION_CENTURY + if len(self.year) == 4 and self.year.endswith("X"): + return PRECISION_DECADE + raise ValueError(f"Unspecified date {self} has no precision") class Level1Interval(Interval): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4932e95..464aca3 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -85,6 +85,13 @@ ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), + # Unspecified and qualified + # "circa 17th century" + ("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")), + ("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")), + ("1XXX", ("1000-01-01", "1999-12-31")), + ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), + ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From b53df4a599fef6d25ecef43da0601f352505b48c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:08:57 -0400 Subject: [PATCH 045/135] Handle negative unspecified and negative unspecified + qualified Requires quite a few overrides of lower_ and upper_ range methods to properly handle dates due to padding working in the opposite direction for negative dates, esp when combined with month/day padding. --- edtf/parser/grammar.py | 2 +- edtf/parser/parser_classes.py | 226 +++++++++++++++++++++++++++++----- edtf/parser/tests.py | 3 + 3 files changed, 201 insertions(+), 30 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index ae03251..f458b2b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -162,7 +162,7 @@ def f(toks): # (* *** unspecified *** *) yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( - digit + (digit ^ "X") + (digit ^ "X") + "X" + Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X" )("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0bbf855..43f4a9c 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -561,16 +561,13 @@ def __init__( int(significant_digits) if significant_digits else None ) self.ua = ua if ua else None + self.negative = self.year.startswith("-") def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + base = super().__str__() if self.ua: - r += str(self.ua) - return r + base += str(self.ua) + return base def _get_fuzzy_padding(self, lean): if not self.ua: @@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - if self.precision == PRECISION_MILLENIUM: - padding += relativedelta( - years=int( - multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years - ) - ) - elif self.precision == PRECISION_CENTURY: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + year_no_symbol = self.year.lstrip("-") + years_padding = self._calculate_years_padding(multiplier, year_no_symbol) + # Reverse the padding for negative years and earliest calculations + # if self.negative: + # years_padding = -years_padding if lean == EARLIEST else years_padding + # else: + # years_padding = years_padding if lean == EARLIEST else -years_padding + + padding += years_padding + if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -608,6 +597,184 @@ def _get_fuzzy_padding(self, lean): return padding + def _calculate_years_padding(self, multiplier, year_no_symbol): + if self.precision == PRECISION_MILLENIUM: + return relativedelta( + years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) + ) + elif self.precision == PRECISION_CENTURY: + return relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + return relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + return relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + + def lower_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.lower_strict() + ) # negative handled in the lower_strict() override + + if self.negative: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def upper_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.upper_strict() + ) # negative handled in the upper_strict() override + + if self.negative: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def lower_strict(self): + if self.negative: + strict_val = self._strict_date( + lean=LATEST + ) # gets the year right, but need to adjust day and month + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 1, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + if self.negative: + strict_val = self._strict_date(lean=EARLIEST) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 12, 31) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=LATEST) + @property def precision(self): if self.day: @@ -615,13 +782,14 @@ def precision(self): if self.month: return PRECISION_MONTH if self.year: - if self.year.isdigit(): + year_no_symbol = self.year.lstrip("-") + if year_no_symbol.isdigit(): return PRECISION_YEAR - if len(self.year) == 4 and self.year.endswith("XXX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"): return PRECISION_MILLENIUM - if len(self.year) == 4 and self.year.endswith("XX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"): return PRECISION_CENTURY - if len(self.year) == 4 and self.year.endswith("X"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"): return PRECISION_DECADE raise ValueError(f"Unspecified date {self} has no precision") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 464aca3..c89b3b8 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -81,6 +81,8 @@ ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 ("1999-XX-XX", ("1999-01-01", "1999-12-31")), + # negative unspecified year + ("-01XX", ("-0199-01-01", "-0100-12-31")), # Uncertain/Approximate lower boundary dates (BCE) ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), @@ -92,6 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From c14a57b63846c5b94a00ae87c7ad16c37717ba6b Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:51:47 -0400 Subject: [PATCH 046/135] Cleanup --- edtf/parser/parser_classes.py | 171 ++++++---------------------------- edtf/parser/tests.py | 2 +- 2 files changed, 32 insertions(+), 141 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 43f4a9c..a15cbf1 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -550,17 +550,14 @@ def __init__( ua=None, **kwargs, ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None + super().__init__( + year=year, + month=month, + day=day, + significant_digits=significant_digits, + **kwargs, ) - self.ua = ua if ua else None + self.ua = ua self.negative = self.year.startswith("-") def __str__(self): @@ -576,16 +573,8 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - year_no_symbol = self.year.lstrip("-") - years_padding = self._calculate_years_padding(multiplier, year_no_symbol) - # Reverse the padding for negative years and earliest calculations - # if self.negative: - # years_padding = -years_padding if lean == EARLIEST else years_padding - # else: - # years_padding = years_padding if lean == EARLIEST else -years_padding - + years_padding = self._years_padding(multiplier) padding += years_padding - if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -594,127 +583,32 @@ def _get_fuzzy_padding(self, lean): padding += relativedelta( days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) ) - return padding - def _calculate_years_padding(self, multiplier, year_no_symbol): - if self.precision == PRECISION_MILLENIUM: - return relativedelta( - years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) - ) - elif self.precision == PRECISION_CENTURY: - return relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - return relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - return relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + def _years_padding(self, multiplier): + """Calculate year padding based on the precision.""" + precision_settings = { + PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years, + PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years, + PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years, + PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years, + } + years = precision_settings.get(self.precision, 0) + return relativedelta(years=int(multiplier * years)) def lower_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.lower_strict() ) # negative handled in the lower_strict() override - - if self.negative: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) return adjusted def upper_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.upper_strict() ) # negative handled in the upper_strict() override - if self.negative: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) return adjusted def lower_strict(self): @@ -722,11 +616,11 @@ def lower_strict(self): strict_val = self._strict_date( lean=LATEST ) # gets the year right, but need to adjust day and month - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 1, 1) @@ -734,11 +628,8 @@ def lower_strict(self): + tuple(TIME_EMPTY_EXTRAS) ) elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange( - strict_val.tm_year, strict_val.tm_mon - )[1] return struct_time( - (strict_val.tm_year, strict_val.tm_mon, days_in_month) + (strict_val.tm_year, strict_val.tm_mon, 1) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) ) @@ -750,11 +641,11 @@ def lower_strict(self): def upper_strict(self): if self.negative: strict_val = self._strict_date(lean=EARLIEST) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 12, 31) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c89b3b8..199f245 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -94,7 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), - ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From 53d3a32c9fe0b18fb7aa550de4478cc18550bc2f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:10:46 -0400 Subject: [PATCH 047/135] Add a global debug setting If not in debug mode, use a simpler EDTFParseException rather than returning the full pyparsing error --- edtf/appsettings.py | 2 ++ edtf/parser/grammar.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e00a223..8e15846 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -98,3 +98,5 @@ MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) + +DEBUG_PYPARSING = False diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index f458b2b..1e624fc 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -4,6 +4,7 @@ # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips import pyparsing +from edtf.appsettings import DEBUG_PYPARSING pyparsing.ParserElement.enablePackrat() @@ -342,7 +343,9 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False): +def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): + if debug is None: + debug = DEBUG_PYPARSING try: if not str: raise ParseException("You must supply some input text") @@ -352,4 +355,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False): except ParseException as err: if fail_silently: return None - raise EDTFParseException(err) from err + if debug: + raise + near_text = str[max(err.loc - 10, 0) : err.loc + 10] + full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." + raise EDTFParseException(full_msg) from None From ab6c41320eb2354bbf68b78ec5d121a0709dd777 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:39:42 -0400 Subject: [PATCH 048/135] Handle empty string --- edtf/parser/grammar.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1e624fc..773f806 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -343,13 +343,13 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): +def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING try: - if not str: + if not input_string: raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) + p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] except ParseException as err: @@ -357,6 +357,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = str[max(err.loc - 10, 0) : err.loc + 10] + near_text = "" + if input_string: + near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." raise EDTFParseException(full_msg) from None From 55b0723754b7eb606820b11ccc7bb04d5a6232b3 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:58:07 -0400 Subject: [PATCH 049/135] Add targeted failure and tests for empty and null inputs --- edtf/parser/grammar.py | 4 ++-- edtf/parser/tests.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 773f806..651b4b3 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -346,9 +346,9 @@ def f(toks): def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING + if not input_string: + raise EDTFParseException("You must supply some input text") try: - if not input_string: - raise ParseException("You must supply some input text") p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 199f245..15875b9 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -347,6 +347,14 @@ def test_non_parsing(bad_input): parse(bad_input) +@pytest.mark.parametrize("bad_input", [None, ""]) +def test_empty_input(bad_input): + """Test that empty input raises a specific exception.""" + with pytest.raises(EDTFParseException) as exc_info: + parse(bad_input) + assert "You must supply some input text" in str(exc_info.value) + + def test_comparisons(): """Test comparisons between parsed EDTF objects and standard dates.""" d1 = parse("1979-08~") From d5ad27b37916ebe333642de1cc5b20ea5986465a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:00:32 -0400 Subject: [PATCH 050/135] Improve EDTFParseException handling Includes handling for empty or null input strings and null errs passed to the constructor Co-Authored-By: aweakley <224316+aweakley@users.noreply.github.com> --- edtf/fields.py | 12 ++++++++---- edtf/parser/edtf_exceptions.py | 26 +++++++++++++++++++++++++- edtf/parser/grammar.py | 8 ++------ 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index f717592..2f25c94 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -4,10 +4,12 @@ from django.db import models from django.db.models import signals from django.db.models.query_utils import DeferredAttribute +from pyparsing import ParseException from edtf import EDTFObject, parse_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd from edtf.natlang import text_to_edtf +from edtf.parser.edtf_exceptions import EDTFParseException DATE_ATTRS = ( "lower_strict", @@ -132,10 +134,12 @@ def update_values(self, instance, *args, **kwargs): if direct_input and ( existing_value is None or str(existing_value) != direct_input ): - edtf = parse_edtf( - direct_input, fail_silently=True - ) # ParseException if invalid; should this be raised? - # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + try: + edtf = parse_edtf( + direct_input, fail_silently=True + ) # ParseException if invalid; should this be raised? + except ParseException as err: + raise EDTFParseException(direct_input, err) from None # set the natural_text (display) field to the direct_input if it is not provided if natural_text == "": diff --git a/edtf/parser/edtf_exceptions.py b/edtf/parser/edtf_exceptions.py index 9530602..d906d58 100644 --- a/edtf/parser/edtf_exceptions.py +++ b/edtf/parser/edtf_exceptions.py @@ -2,4 +2,28 @@ class EDTFParseException(ParseException): - pass + """Raised when an input cannot be parsed as an EDTF string. + + Attributes: + input_string - the input string that could not be parsed + err -- the original ParseException that caused this one + """ + + def __init__(self, input_string, err=None): + if input_string is None: + input_string = "" + self.input_string = input_string + if err is None: + err = ParseException(input_string, 0, "Invalid input or format.") + self.err = err + super().__init__(str(err), err.loc if err.loc else 0, self.input_string) + + def __str__(self): + if not self.input_string: + return "You must supply some input text" + near_text = ( + self.input_string[max(self.err.loc - 10, 0) : self.err.loc + 10] + if hasattr(self.err, "loc") + else "" + ) + return f"Error at position {self.err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 651b4b3..beabf52 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -347,7 +347,7 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING if not input_string: - raise EDTFParseException("You must supply some input text") + raise EDTFParseException(input_string) try: p = edtfParser.parseString(input_string.strip(), parseAll) if p: @@ -357,8 +357,4 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = "" - if input_string: - near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] - full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." - raise EDTFParseException(full_msg) from None + raise EDTFParseException(input_string, err) from None From daf0d041dc739975e822f35813dfd82ca75eacea Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:40:15 -0400 Subject: [PATCH 051/135] Add the TestEvent model to Django admin Make the string representation of TestEvent simpler --- edtf_django_tests/edtf_integration/admin.py | 44 +++++++++++++++++++- edtf_django_tests/edtf_integration/models.py | 4 -- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py index 846f6b4..3051891 100644 --- a/edtf_django_tests/edtf_integration/admin.py +++ b/edtf_django_tests/edtf_integration/admin.py @@ -1 +1,43 @@ -# Register your models here. +from django.contrib import admin + +from .models import TestEvent + + +class TestEventAdmin(admin.ModelAdmin): + list_display = ( + "date_display", + "date_edtf_direct", + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + search_fields = ("date_display", "date_edtf_direct") + list_filter = ("date_earliest", "date_latest") + readonly_fields = ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + + fieldsets = ( + (None, {"fields": ("date_display", "date_edtf_direct", "date_edtf")}), + ( + "Computed Dates", + { + "classes": ("collapse",), + "fields": ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + ), + }, + ), + ) + + +admin.site.register(TestEvent, TestEventAdmin) diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index 5120889..5e66592 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -49,9 +49,5 @@ def __str__(self) -> str: return ( f"Test Event: {self.date_display=}, " f"{self.date_edtf_direct=}, " - f"{self.date_earliest=}, " - f"{self.date_latest=}, " - f"{self.date_sort_ascending=}, " - f"{self.date_sort_descending=}, " f"{self.date_edtf=}" ) From 581855784dd428a51fd17ac71e92301030a48624 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 5 Jun 2024 13:48:07 -0400 Subject: [PATCH 052/135] Update qualification properties Properly set qualification properties on - UncertainOrApproximate - Unspecified - Level1Interval - PartialUncertainOrApproximate - Level2Interval Adds tests to check that each EDTF object is parsed and that .is_approximate, .is_uncertain, and .is_uncertain_and_approximate are set to what we expect them to be --- edtf/parser/parser_classes.py | 45 ++++++++++++++++++++++++++++++++++- edtf/parser/tests.py | 32 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index a15cbf1..b2dbadd 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -91,7 +91,7 @@ def apply_delta(op, time_struct, delta): class EDTFObject: """ - Object to attact to a parser to become instantiated when the parser + Object to attach to a parser to become instantiated when the parser completes. """ @@ -470,6 +470,11 @@ class UncertainOrApproximate(EDTFObject): def __init__(self, date, ua): self.date = date self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) def __str__(self): if self.ua: @@ -558,6 +563,11 @@ def __init__( **kwargs, ) self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) self.negative = self.year.startswith("-") def __str__(self): @@ -709,6 +719,12 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -840,6 +856,27 @@ def __init__( self.all_ua = all_ua + uas = [ + year_ua, + month_ua, + day_ua, + year_month_ua, + month_day_ua, + season_ua, + all_ua, + ] + self.is_uncertain = any( + item.is_uncertain for item in uas if hasattr(item, "is_approximate") + ) + self.is_approximate = any( + item.is_approximate for item in uas if hasattr(item, "is_approximate") + ) + self.is_uncertain_and_approximate = any( + item.is_uncertain_and_approximate + for item in uas + if hasattr(item, "is_uncertain_and_approximate") + ) + def __str__(self): if self.season_ua: return f"{self.season}{self.season_ua}" @@ -1046,6 +1083,12 @@ def __init__(self, lower, upper): self.upper = upper[0] else: self.upper = upper + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) class Level2Season(Season): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 15875b9..e7f2953 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -240,6 +240,24 @@ "2001-29", ) +APPROXIMATE_UNCERTAIN_EXAMPLES = ( + # first part of tuple is the input EDTF string, second part is a tuple of booleans: + # uncertain ?, approximate ~, both uncertain and approximate % + ("2004", (False, False, False)), + ("2006-06-11", (False, False, False)), + ("-0999", (False, False, False)), + ("1984?", (True, False, False)), + ("2004-06-11?", (True, False, False)), + ("1984~", (False, True, False)), + ("1984%", (False, False, True)), + ("1984~/2004-06", (False, True, False)), + ("2004-%06", (False, False, True)), + ("2004?-~06-~04", (True, True, False)), + ("2011-~06-~04", (False, True, False)), + ("2004-06-~01/2004-06-~20", (False, True, False)), + ("156X~", (False, True, False)), +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -379,3 +397,17 @@ def test_comparisons(): def test_benchmark_parser(benchmark, test_input): """Benchmark parsing of selected EDTF strings.""" benchmark(parse, test_input) + + +@pytest.mark.parametrize("test_input,expected_tuple", APPROXIMATE_UNCERTAIN_EXAMPLES) +def test_approximate_uncertain(test_input, expected_tuple): + """Test parsing of EDTF strings and check .is_uncertain, .is_approximate, + and .is_uncertain_and_approximate properties. The expected_tuple should have three + values, the first should be a boolean indicating if the date is uncertain, + the second should be a boolean indicating if the date is approximate, and the + third should be a boolean indicating if the date is both uncertain and approximate.""" + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" + assert result.is_uncertain == expected_tuple[0] + assert result.is_approximate == expected_tuple[1] + assert result.is_uncertain_and_approximate == expected_tuple[2] From 63a15736d8d389bd2a8d29fb21990e7fb16f2569 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 5 Jun 2024 15:00:22 -0400 Subject: [PATCH 053/135] Add docs about qualification properties --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/README.md b/README.md index 9fc6ede..b001157 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,50 @@ One can interpret uncertain or approximate dates as 'plus or minus a [level of p If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. +### Qualification properties +EDTF objects support properties that provide an overview of how the object is qualified: +- `.is_uncertain (?)` +- `.is_approximate (~)` +- `.is_uncertain_and_approximate (%)` +These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. +```python +>>> parse_edtf("2006-06-11") +Date: '2006-06-11' +>>> parse_edtf("2006-06-11").is_uncertain +False +>>> parse_edtf("2006-06-11").is_approximate +False + +>>> parse_edtf("1984?") +UncertainOrApproximate: '1984?' +>>> parse_edtf("1984?").is_approximate +False +>>> parse_edtf("1984?").is_uncertain +True +>>> parse_edtf("1984?").is_uncertain_and_approximate +False + +>>> parse_edtf("1984%").is_uncertain +False +>>> parse_edtf("1984%").is_uncertain_and_approximate +True + +>>> parse_edtf("1984~/2004-06") +Level1Interval: '1984~/2004-06' +>>> parse_edtf("1984~/2004-06").is_approximate +True +>>> parse_edtf("1984~/2004-06").is_uncertain +False + +>>> parse_edtf("2004?-~06-~04") +PartialUncertainOrApproximate: '2004?-~06-~04'>>> L2_PartialUncertainOrApproximate.is_approximate +True +>>> parse_edtf("2004?-~06-~04").is_uncertain +True +>>> parse_edtf("2004?-~06-~04").is_uncertain_and_approximate +False +``` + ### Seasons Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. From b3205afe2d634527fe2c6d5f83670f2c1e6c49ba Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 10 Jun 2024 22:57:59 -0400 Subject: [PATCH 054/135] Fix typo, add more tests --- README.md | 3 ++- edtf/parser/parser_classes.py | 2 +- edtf/parser/tests.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b001157..6acb176 100644 --- a/README.md +++ b/README.md @@ -378,7 +378,8 @@ True False >>> parse_edtf("2004?-~06-~04") -PartialUncertainOrApproximate: '2004?-~06-~04'>>> L2_PartialUncertainOrApproximate.is_approximate +PartialUncertainOrApproximate: '2004?-~06-~04' +>>> parse_edtf("2004?-~06-~04").is_approximate True >>> parse_edtf("2004?-~06-~04").is_uncertain True diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b2dbadd..ed03355 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -866,7 +866,7 @@ def __init__( all_ua, ] self.is_uncertain = any( - item.is_uncertain for item in uas if hasattr(item, "is_approximate") + item.is_uncertain for item in uas if hasattr(item, "is_uncertain") ) self.is_approximate = any( item.is_approximate for item in uas if hasattr(item, "is_approximate") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index e7f2953..c2dd711 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -253,6 +253,7 @@ ("1984~/2004-06", (False, True, False)), ("2004-%06", (False, False, True)), ("2004?-~06-~04", (True, True, False)), + ("2004?-06-04", (True, False, False)), ("2011-~06-~04", (False, True, False)), ("2004-06-~01/2004-06-~20", (False, True, False)), ("156X~", (False, True, False)), From 7a99f1203aa675aa37fb01b9a8af527c6c40dfd5 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:37:22 -0400 Subject: [PATCH 055/135] Simplify EDTFField init; add direct_input_field to deconstruct() --- edtf/fields.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index 2f25c94..642b6bb 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -48,21 +48,12 @@ def __init__( **kwargs, ): kwargs["max_length"] = 2000 - ( - self.natural_text_field, - self.direct_input_field, - self.lower_strict_field, - self.upper_strict_field, - self.lower_fuzzy_field, - self.upper_fuzzy_field, - ) = ( - natural_text_field, - direct_input_field, - lower_strict_field, - upper_strict_field, - lower_fuzzy_field, - upper_fuzzy_field, - ) + self.natural_text_field = natural_text_field + self.direct_input_field = direct_input_field + self.lower_strict_field = lower_strict_field + self.upper_strict_field = upper_strict_field + self.lower_fuzzy_field = lower_fuzzy_field + self.upper_fuzzy_field = upper_fuzzy_field super().__init__(verbose_name, name, **kwargs) description = ( @@ -74,6 +65,8 @@ def deconstruct(self): name, path, args, kwargs = super().deconstruct() if self.natural_text_field: kwargs["natural_text_field"] = self.natural_text_field + if self.direct_input_field: + kwargs["direct_input_field"] = self.direct_input_field for attr in DATE_ATTRS: field = f"{attr}_field" @@ -152,7 +145,7 @@ def update_values(self, instance, *args, **kwargs): ): edtf = parse_edtf( edtf_string, fail_silently=True - ) # potetial ParseException if invalid; should this be raised? + ) # potential ParseException if invalid; should this be raised? else: edtf = existing_value else: From e99813cbdcf878111c946c9b8b5f142e38bfb833 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:53:22 -0400 Subject: [PATCH 056/135] Only publish benchmark results on the upstream --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f97b3c..4645d13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name != 'pull_request' + if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf' with: tool: 'pytest' auto-push: true @@ -112,6 +112,7 @@ jobs: summary-always: true - name: Comment on benchmark results without publishing + if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf' uses: benchmark-action/github-action-benchmark@v1 with: tool: 'pytest' From 952949156289cb1da8b1a9af59f32687cbdada8e Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 13 Jun 2024 21:12:55 +1000 Subject: [PATCH 057/135] Anticipate None for date_display #62 --- edtf/fields.py | 2 +- edtf_django_tests/edtf_integration/tests.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 642b6bb..7dba5d4 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -135,7 +135,7 @@ def update_values(self, instance, *args, **kwargs): raise EDTFParseException(direct_input, err) from None # set the natural_text (display) field to the direct_input if it is not provided - if natural_text == "": + if not natural_text: setattr(instance, self.natural_text_field, direct_input) elif natural_text: diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index 88fdca8..493d0d2 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -74,6 +74,26 @@ def test_date_display(self): self.assertEqual(self.event3.date_display, "2019-11") self.assertEqual(self.event4.date_display, "Approximately August 2018") + def test_date_display_with_none_or_empty_string(self): + """ + Test that the date_display field is correctly populated when the + `natural_date` field is set to empty string (for example, if it + were used with `null=False` in the model definition) or set to + None (if it were used with `null=True`). + """ + event = TestEvent(date_display="") + event.date_edtf_direct = "2020-03-15/2020-04-15" + # Trigger the descriptor to update the date_display field + event.date_edtf = "" + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + + event = TestEvent(date_display=None) + # Verify date_display is set to None even though the field is `null=False` + self.assertIsNone(event.date_display) + event.date_edtf_direct = "2020-03-15/2020-04-15" + event.date_edtf = "" + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + def test_comparison(self): # test equality of the same dates self.assertEqual( From 48b232fb70f3c0981fc26cca2f5ff4c965c83168 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:34:54 +0200 Subject: [PATCH 058/135] style(readme): use project name for heading --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6acb176..074d2f1 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ -edtf -===== +# python-edtf An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. -See http://www.loc.gov/standards/datetime/ for the current draft specification. +See for the final draft specification. This project is based on python-edtf and was developed to include the newest specification From c0dce8ad8519a5129ec02231221ee54a89e88934 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:35:24 +0200 Subject: [PATCH 059/135] style(readme): unify code block style --- README.md | 549 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 305 insertions(+), 244 deletions(-) diff --git a/README.md b/README.md index 074d2f1..b5f5bbc 100644 --- a/README.md +++ b/README.md @@ -11,69 +11,87 @@ This project is based on python-edtf and was developed to include the newest spe ## To install - pip install edtf +```shell +pip install edtf +``` ## To use - >>> from edtf import parse_edtf - # Parse an EDTF string to an EDTFObject - >>> e = parse_edtf("1979-08~") # approx August 1979 - >>> e - UncertainOrApproximate: '1979-08~' - # normalised string representation (some different EDTF strings have identical meanings) - >>> unicode(e) - u'1979-08~' - - # Derive Python date objects - # lower and upper bounds that strictly adhere to the given range - >>> e.lower_strict()[:3], e.upper_strict()[:3] - ((1979, 8, 1), (1979, 8, 31)) - # lower and upper bounds that are padded if there's indicated uncertainty - >>> e.lower_fuzzy()[:3], e.upper_fuzzy()[:3] - ((1979, 7, 1), (1979, 9, 30)) - - # Date intervals - >>> interval = parse_edtf("1979-08~/..") - >>> interval - Level1Interval: '1979-08~/..' - # Intervals have lower and upper EDTF objects. - >>> interval.lower, interval.upper - (UncertainOrApproximate: '1979-08~', UnspecifiedIntervalSection: '..') - >>> interval.lower.lower_strict()[:3], interval.lower.upper_strict()[:3] - ((1979, 8, 1), (1979, 8, 31)) - >>> interval.upper.upper_strict() # '..' is interpreted to mean open interval and is returning -/+ math.inf - math.inf - - # Date collections - >>> coll = parse_edtf('{1667,1668, 1670..1672}') - >>> coll - MultipleDates: '{1667, 1668, 1670..1672}' - >>> coll.objects - (Date: '1667', Date: '1668', Consecutives: '1670..1672') +```python +>>> from edtf import parse_edtf + +# Parse an EDTF string to an EDTFObject +>>> +>>> e = parse_edtf("1979-08~") # approx August 1979 +>>> e +UncertainOrApproximate: '1979-08~' + +# normalised string representation (some different EDTF strings have identical meanings) +>>> +>>> unicode(e) +u'1979-08~' + +# Derive Python date objects + +# lower and upper bounds that strictly adhere to the given range +>>> +>>> e.lower_strict()[:3], e.upper_strict()[:3] +((1979, 8, 1), (1979, 8, 31)) + +# lower and upper bounds that are padded if there's indicated uncertainty +>>> +>>> e.lower_fuzzy()[:3], e.upper_fuzzy()[:3] +((1979, 7, 1), (1979, 9, 30)) + +# Date intervals +>>> +>>> interval = parse_edtf("1979-08~/..") +>>> interval +Level1Interval: '1979-08~/..' + +# Intervals have lower and upper EDTF objects +>>> +>>> interval.lower, interval.upper +(UncertainOrApproximate: '1979-08~', UnspecifiedIntervalSection: '..') +>>> interval.lower.lower_strict()[:3], interval.lower.upper_strict()[:3] +((1979, 8, 1), (1979, 8, 31)) +>>> interval.upper.upper_strict() # '..' is interpreted to mean open interval and is returning -/+ math.inf +math.inf + +# Date collections +>>> +>>> coll = parse_edtf('{1667,1668, 1670..1672}') +>>> coll +MultipleDates: '{1667, 1668, 1670..1672}' +>>> coll.objects +(Date: '1667', Date: '1668', Consecutives: '1670..1672') +``` The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_classes.EDTFObject` subclass, depending on the type of date that was parsed. These classes are: - # Level 0 - Date - DateAndTime - Interval - - # Level 1 - UncertainOrApproximate - Unspecified - Level1Interval - UnspecifiedIntervalSection - LongYear - Season - - # Level 2 - PartialUncertainOrApproximate - PartialUnspecified - OneOfASet - MultipleDates - Level2Interval - Level2Season - ExponentialYear +```text +# Level 0 +Date +DateAndTime +Interval + +# Level 1 +UncertainOrApproximate +Unspecified +Level1Interval +UnspecifiedIntervalSection +LongYear +Season + +# Level 2 +PartialUncertainOrApproximate +PartialUnspecified +OneOfASet +MultipleDates +Level2Interval +Level2Season +ExponentialYear +``` All of these implement `upper/lower_strict/fuzzy()` methods to derive `struct_time` objects, except of UnspecifiedIntervalSection, that can also return math.inf value @@ -91,177 +109,209 @@ Test coverage includes every example given in the spec table of features. * Date: - >>> parse_edtf('1979-08') # August 1979 - Date: '1979-08' +```python +>>> parse_edtf('1979-08') # August 1979 +Date: '1979-08' +``` * Date and Time: - >>> parse_edtf('2004-01-01T10:10:10+05:00') - DateAndTime: '2004-01-01T10:10:10+05:00' +```python +>>> parse_edtf('2004-01-01T10:10:10+05:00') +DateAndTime: '2004-01-01T10:10:10+05:00' +``` * Interval (start/end): - >>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 - Interval: '1979-08-28/1979-09-25' +```python +>>> parse_edtf('1979-08-28/1979-09-25') # From August 28 to September 25 1979 +Interval: '1979-08-28/1979-09-25' +``` ### Level 1 Extensions * Uncertain/Approximate dates: - >>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 - UncertainOrApproximate: '1979-08-28~' +```python +>>> parse_edtf('1979-08-28~') # Approximately August 28th 1979 +UncertainOrApproximate: '1979-08-28~' +``` * Unspecified dates: - >>> parse_edtf('1979-08-XX') # An unknown day in August 1979 - Unspecified: '1979-08-XX' - >>> parse_edtf('1979-XX') # Some month in 1979 - Unspecified: '1979-XX' +```python +>>> parse_edtf('1979-08-XX') # An unknown day in August 1979 +Unspecified: '1979-08-XX' +>>> parse_edtf('1979-XX') # Some month in 1979 +Unspecified: '1979-XX' +``` * Extended intervals: - >>> parse_edtf('1984-06-02?/2004-08-08~') - Level1Interval: '1984-06-02?/2004-08-08~' +```python +>>> parse_edtf('1984-06-02?/2004-08-08~') +Level1Interval: '1984-06-02?/2004-08-08~' +``` * Years exceeding four digits: - >>> parse_edtf('Y-12000') # 12000 years BCE - LongYear: 'Y-12000' +```python +>>> parse_edtf('Y-12000') # 12000 years BCE +LongYear: 'Y-12000' +``` * Season: - >>> parse_edtf('1979-22') # Summer 1979 - Season: '1979-22' +```python +>>> parse_edtf('1979-22') # Summer 1979 +Season: '1979-22' +``` ### Level 2 Extensions * Partial uncertain/approximate: - >>> parse_edtf('2004-06~-11') # year certain, month/day approximate. - PartialUncertainOrApproximate: '2004-06~-11' +```python +>>> parse_edtf('2004-06~-11') # year certain, month/day approximate. +PartialUncertainOrApproximate: '2004-06~-11' +``` * Partial unspecified: - >>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 - PartialUnspecified: '1979-XX-28' +```python +>>> parse_edtf('1979-XX-28') # The 28th day of an uncertain month in 1979 +PartialUnspecified: '1979-XX-28' +``` * One of a set: - >>> parse_edtf("[..1760-12-03,1762]") - OneOfASet: '[..1760-12-03, 1762]' +```python +>>> parse_edtf("[..1760-12-03,1762]") +OneOfASet: '[..1760-12-03, 1762]' +``` * Multiple dates: - >>> parse_edtf('{1667,1668, 1670..1672}') - MultipleDates: '{1667, 1668, 1670..1672}' +```python +>>> parse_edtf('{1667,1668, 1670..1672}') +MultipleDates: '{1667, 1668, 1670..1672}' +``` * Level 2 Extended intervals: - >>> parse_edtf('2004-06-~01/2004-06-~20') - Level2Interval: '2004-06-~01/2004-06-~20' +```python +>>> parse_edtf('2004-06-~01/2004-06-~20') +Level2Interval: '2004-06-~01/2004-06-~20' +``` * Year requiring more than 4 digits - exponential form: - >>> e = parse_edtf('Y-17E7') - ExponentialYear: 'Y-17E7' - >>> e.estimated() - -170000000 +```python +>>> e = parse_edtf('Y-17E7') +ExponentialYear: 'Y-17E7' +>>> e.estimated() +-170000000 +``` * Significant digits: - # '1950S2': some year between 1900 and 1999, estimated to be 1950 - >>> d = parse_edtf('1950S2') - Date: '1950S2' - >>> d.lower_fuzzy()[:3] - (1900, 1, 1) - >>> d.upper_fuzzy()[:3] - (1999, 12, 31) - # 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. - >>> l = parse_edtf('Y171010000S3') - LongYear: 'Y171010000S3' - >>> l.estimated() - 171010000 - >>> l.lower_fuzzy()[:3] - (171000000, 1, 1) - >>> l.upper_fuzzy()[:3] - (171999999, 12, 31) - # 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 - >>> e = parse_edtf('Y3388E2S3') - ExponentialYear: 'Y3388E2S3S3' - >>> e.estimated() - 338800 - >>> e.lower_fuzzy()[:3] - (338000, 1, 1) - >>> e.upper_fuzzy()[:3] - (338999, 12, 31) +```python +# '1950S2': some year between 1900 and 1999, estimated to be 1950 +>>> d = parse_edtf('1950S2') +Date: '1950S2' +>>> d.lower_fuzzy()[:3] +(1900, 1, 1) +>>> d.upper_fuzzy()[:3] +(1999, 12, 31) +# 'Y171010000S3': some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. +>>> l = parse_edtf('Y171010000S3') +LongYear: 'Y171010000S3' +>>> l.estimated() +171010000 +>>> l.lower_fuzzy()[:3] +(171000000, 1, 1) +>>> l.upper_fuzzy()[:3] +(171999999, 12, 31) +# 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 +>>> e = parse_edtf('Y3388E2S3') +ExponentialYear: 'Y3388E2S3S3' +>>> e.estimated() +338800 +>>> e.lower_fuzzy()[:3] +(338000, 1, 1) +>>> e.upper_fuzzy()[:3] +(338999, 12, 31) +``` ### Natural language representation - The library includes a basic English natural language parser (it's not yet smart enough to work with occasions such as 'Easter', or in other languages): - >>> from edtf import text_to_edtf - >>> text_to_edtf("circa August 1979") - '1979-08~' +```python +>>> from edtf import text_to_edtf +>>> text_to_edtf("circa August 1979") +'1979-08~' +``` Note that the result is a string, not an `ETDFObject`. The parser can parse strings such as: - 'January 12, 1940' => '1940-01-12' - '90' => '1990' #implied century - 'January 2008' => '2008-01' - 'the year 1800' => '1800' - '10/7/2008' => '2008-10-07' # in a full-specced date, assume US ordering - - # uncertain/approximate - '1860?' => '1860?' - '1862 (uncertain)' => '1862?' - 'circa Feb 1812' => '1812-02~' - 'c.1860' => '1860~' #with or without . - 'ca1860' => '1860~' - 'approx 1860' => '1860~' - 'ca. 1860s' => '186X~' - 'circa 1840s' => '184X~' - 'ca. 1860s?' => '186X?~' - 'c1800s?' => '180X?~' # with uncertainty indicators, use the decade - - # unspecified parts - 'January 12' => 'XXXX-01-12' - 'January' => 'XXXX-01' - '7/2008' => '2008-07' - 'month in 1872' => '1872-XX' - 'day in January 1872' => '1872-01-XX' - 'day in 1872' => '1872-XX-XX' - - #seasons - 'Autumn 1872' => '1872-23' - 'Fall 1872' => '1872-23' - - # before/after - 'earlier than 1928' => '/1928' - 'later than 1928' => '1928/' - 'before January 1928' => '/1928-01' - 'after about the 1920s' => '192X~/' - - #centuries - '1st century' => '00XX' - '10c' => '09XX' - '19th century?' => '18XX?' - - # just showing off now... - 'a day in about Spring 1849?' => '1849-21-XX?~' - - # simple ranges, which aren't as accurate as they could be. The parser is - limited to only picking the first year range it finds. - '1851-1852' => '1851/1852' - '1851-1852; printed 1853-1854' => '1851/1852' - '1851-52' => '1851/1852' - '1856-ca. 1865' => '1856/1865~' - '1860s-1870s' => '186X/187X' - '1920s - early 1930s' => '192X/193X' - '1938, printed 1940s-1950s' => '1938' - +```text +'January 12, 1940' => '1940-01-12' +'90' => '1990' #implied century +'January 2008' => '2008-01' +'the year 1800' => '1800' +'10/7/2008' => '2008-10-07' # in a full-specced date, assume US ordering + +# uncertain/approximate +'1860?' => '1860?' +'1862 (uncertain)' => '1862?' +'circa Feb 1812' => '1812-02~' +'c.1860' => '1860~' #with or without . +'ca1860' => '1860~' +'approx 1860' => '1860~' +'ca. 1860s' => '186X~' +'circa 1840s' => '184X~' +'ca. 1860s?' => '186X?~' +'c1800s?' => '180X?~' # with uncertainty indicators, use the decade + +# unspecified parts +'January 12' => 'XXXX-01-12' +'January' => 'XXXX-01' +'7/2008' => '2008-07' +'month in 1872' => '1872-XX' +'day in January 1872' => '1872-01-XX' +'day in 1872' => '1872-XX-XX' + +#seasons +'Autumn 1872' => '1872-23' +'Fall 1872' => '1872-23' + +# before/after +'earlier than 1928' => '/1928' +'later than 1928' => '1928/' +'before January 1928' => '/1928-01' +'after about the 1920s' => '192X~/' + +#centuries +'1st century' => '00XX' +'10c' => '09XX' +'19th century?' => '18XX?' + +# just showing off now... +'a day in about Spring 1849?' => '1849-21-XX?~' + +# simple ranges, which aren't as accurate as they could be. The parser is +limited to only picking the first year range it finds. +'1851-1852' => '1851/1852' +'1851-1852; printed 1853-1854' => '1851/1852' +'1851-52' => '1851/1852' +'1856-ca. 1865' => '1856/1865~' +'1860s-1870s' => '186X/187X' +'1920s - early 1930s' => '192X/193X' +'1938, printed 1940s-1950s' => '1938' +``` Generating natural text from an EDTF representation is a future goal. @@ -275,13 +325,10 @@ Generating natural text from an EDTF representation is a future goal. * If a natural language groups dates with a '/', it's interpreted as "or" rather than "and". The resulting EDTF text is a list bracketed by `[]` ("one of these dates") rather than `{}` (all of these dates). - ## Converting to and from Python dates - Since EDTF dates are often regions, and often imprecise, we need to use a few different Python dates, depending on the circumstance. Generally, Python dates are used for sorting and filtering, and are not displayed directly to users. - ### `struct_time` date representation Because Python's `datetime` module does not support dates out side the range 1 AD to 9999 AD we return dates as `time.struct_time` objects by default instead of the `datetime.date` or `datetime.datetime` objects you might expect. @@ -290,7 +337,8 @@ The `struct_time` representation is more difficult to work with, but can be sort If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. -NOTE: This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket https://github.com/ixc/python-edtf/issues/26. +[!NOTE] +This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . ### `lower_strict` and `upper_strict` @@ -298,26 +346,27 @@ These dates indicate the earliest and latest dates that are __strictly__ in the In an ascending sort (most recent last), sort by `lower_strict` to get a natural sort order. In a descending sort (most recent first), sort by `upper_strict`: - >>> e = parse_edtf('1912-04~') +```python +>>> e = parse_edtf('1912-04~') - >>> e.lower_strict() # Returns struct_time - >>> time.struct_time(tm_year=1912, tm_mon=4, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=0, tm_isdst=-1) +>>> e.lower_strict() # Returns struct_time +>>> time.struct_time(tm_year=1912, tm_mon=4, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=0, tm_isdst=-1) - >>> e.lower_strict()[:3] # Show only interesting parts of struct_time - (1912, 4, 01) +>>> e.lower_strict()[:3] # Show only interesting parts of struct_time +(1912, 4, 01) - >>> from edtf import struct_time_to_date - >>> struct_time_to_date(e.lower_strict()) # Convert to date - datetime.date(1912, 4, 01) +>>> from edtf import struct_time_to_date +>>> struct_time_to_date(e.lower_strict()) # Convert to date +datetime.date(1912, 4, 01) - >>> e.upper_strict()[:3] - (1912, 4, 30) +>>> e.upper_strict()[:3] +(1912, 4, 30) - >>> struct_time_to_date(e.upper_strict()) - datetime.date(1912, 4, 30) +>>> struct_time_to_date(e.upper_strict()) +datetime.date(1912, 4, 30) +``` ### `lower_fuzzy` and `upper_fuzzy` ------------------------------------ These dates indicate the earliest and latest dates that are __possible__ in the date range, for a fairly arbitrary definition of 'possibly'. @@ -325,28 +374,34 @@ These values are useful for filtering results - i.e. testing which EDTF dates mi The fuzzy dates are derived from the strict dates, plus or minus a level of padding that depends on how precise the date specfication is. For the case of approximate or uncertain dates, we (arbitrarily) pad the ostensible range by 100% of the uncertain timescale, or by a 12 weeks in the case of seasons. That is, if a date is approximate at the month scale, it is padded by a month. If it is approximate at the year scale, it is padded by a year: - >>> e = parse_edtf('1912-04~') - >>> e.lower_fuzzy()[:3] # padding is 100% of a month - (1912, 3, 1) - >>> e.upper_fuzzy()[:3] - (1912, 5, 30) - - >>> e = parse_edtf('1912~') - >>> e.lower_fuzzy()[:3] # padding is 100% of a year - (1911, 1, 1) - >>> e.upper_fuzzy()[:3] - (1913, 12, 31) +```python +>>> e = parse_edtf('1912-04~') +>>> e.lower_fuzzy()[:3] # padding is 100% of a month +(1912, 3, 1) +>>> e.upper_fuzzy()[:3] +(1912, 5, 30) + +>>> e = parse_edtf('1912~') +>>> e.lower_fuzzy()[:3] # padding is 100% of a year +(1911, 1, 1) +>>> e.upper_fuzzy()[:3] +(1913, 12, 31) +``` One can interpret uncertain or approximate dates as 'plus or minus a [level of precision]'. If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. ### Qualification properties + EDTF objects support properties that provide an overview of how the object is qualified: -- `.is_uncertain (?)` -- `.is_approximate (~)` -- `.is_uncertain_and_approximate (%)` + +* `.is_uncertain (?)` +* `.is_approximate (~)` +* `.is_uncertain_and_approximate (%)` + These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. + ```python >>> parse_edtf("2006-06-11") Date: '2006-06-11' @@ -388,11 +443,12 @@ False ### Seasons +[!IMPORTANT] Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. ### Comparisons -Two EDTF dates are considered equal if their unicode() representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. +Two EDTF dates are considered equal if their `unicode()` representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. ## Django ORM field @@ -402,55 +458,60 @@ To store a natural language value on your model, define another field, and set t When your model is saved, the `natural_text_field` value will be parsed to set the `date_edtf` value, and the underlying EDTF object will set the `_earliest` and `_latest` fields on the model to a float value representing the Julian Date. - -**WARNING**: The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should **not** be used for definitive storage or for display after roundtrip conversions. +[!WARNING] +The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. Example usage: - from django.db import models - from edtf.fields import EDTFField - - class MyModel(models.Model): - date_display = models.CharField( - "Date of creation (display)", - blank=True, - max_length=255, - ) - date_edtf = EDTFField( - "Date of creation (EDTF)", - natural_text_field='date_display', - lower_fuzzy_field='date_earliest', - upper_fuzzy_field='date_latest', - lower_strict_field='date_sort_ascending', - upper_strict_field='date_sort_descending', - blank=True, - null=True, - ) - # use for filtering - date_earliest = models.FloatField(blank=True, null=True) - date_latest = models.FloatField(blank=True, null=True) - # use for sorting - date_sort_ascending = models.FloatField(blank=True, null=True) - date_sort_descending = models.FloatField(blank=True, null=True) - +```python +from django.db import models +from edtf.fields import EDTFField + +class MyModel(models.Model): + date_display = models.CharField( + "Date of creation (display)", + blank=True, + max_length=255, + ) + date_edtf = EDTFField( + "Date of creation (EDTF)", + natural_text_field='date_display', + lower_fuzzy_field='date_earliest', + upper_fuzzy_field='date_latest', + lower_strict_field='date_sort_ascending', + upper_strict_field='date_sort_descending', + blank=True, + null=True, + ) + # use for filtering + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) + # use for sorting + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) +``` Since the `EDTFField` and the `_earliest` and `_latest` field values are set automatically, you may want to make them readonly, or not visible in your model admin. ## To develop + ### Setup -- Clone the repository: `git clone https://github.com/ixc/python-edtf.git` -- Set up a virtual environment: `python3 -m venv venv` -- Install the dependencies: `pip install -r dev-requirements.txt` -- Install precommit hooks: `pre-commit install` + +* Clone the repository: `git clone https://github.com/ixc/python-edtf.git` +* Set up a virtual environment: `python3 -m venv venv` +* Install the dependencies: `pip install -r dev-requirements.txt` +* Install precommit hooks: `pre-commit install` ### Running tests -- From `python-edtf`, run the unit tests: `pytest` -- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) -- From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` -- To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` + +* From `python-edtf`, run the unit tests: `pytest` +* From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) +* From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` +* To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a GitHub PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` ### Linting and formatting -- Check linting: `ruff check --output-format=github --config pyproject.toml` -- Check formatting: `ruff format --check --config pyproject.toml` -- Fix formatting: `ruff format --config pyproject.toml` -- Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. + +* Check linting: `ruff check --output-format=github --config pyproject.toml` +* Check formatting: `ruff format --check --config pyproject.toml` +* Fix formatting: `ruff format --config pyproject.toml` +* Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. From e90db901d2bbd61260d93ec9afb33a30ca0bd432 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:37:44 +0200 Subject: [PATCH 060/135] update authors --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 56978fb..ef2c639 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" } + { name = "Klaus Rettinghaus" } ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" } From 336e8bfb6463015fa2333d1a424134e39eb7de84 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 16 Jun 2024 16:49:39 +0200 Subject: [PATCH 061/135] style(readme): add relative link to file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b5f5bbc..4ef2a69 100644 --- a/README.md +++ b/README.md @@ -444,7 +444,7 @@ False ### Seasons [!IMPORTANT] -Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. +Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). ### Comparisons From ea74dafb4298697ca01cd128db7f4b34ba8e5c3c Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 17 Jun 2024 09:52:17 +1000 Subject: [PATCH 062/135] Update contributors --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ef2c639..f533477 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,10 +12,11 @@ readme = {file = "README.txt", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, { name = "Alastair Weakley"}, + { name = "Greg Turner"}, { name = "James Murty"}, { name = "Mark Finger" }, { name = "Sabine Müller" }, - { name = "Cole Crawford" } + { name = "Cole Crawford" }, { name = "Klaus Rettinghaus" } ] maintainers = [ From 5f09bdf7dea2739f00f1c4de6a995fc61edb5966 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Mon, 17 Jun 2024 08:59:06 +0200 Subject: [PATCH 063/135] fix alerts --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4ef2a69..2aaef05 100644 --- a/README.md +++ b/README.md @@ -337,8 +337,8 @@ The `struct_time` representation is more difficult to work with, but can be sort If you are sure you are working with dates within the range supported by Python's `datetime` module, you can get these more convenient objects using the `edtf.struct_time_to_date` and `edtf.struct_time_to_datetime` functions. -[!NOTE] -This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . +> [!NOTE] +> This library previously did return `date` and `datetime` objects from methods by default before we switched to `struct_time`. See ticket . ### `lower_strict` and `upper_strict` @@ -443,8 +443,8 @@ False ### Seasons -[!IMPORTANT] -Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). +> [!IMPORTANT] +> Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in [`appsettings.py`](edtf/appsettings.py). ### Comparisons @@ -458,8 +458,8 @@ To store a natural language value on your model, define another field, and set t When your model is saved, the `natural_text_field` value will be parsed to set the `date_edtf` value, and the underlying EDTF object will set the `_earliest` and `_latest` fields on the model to a float value representing the Julian Date. -[!WARNING] -The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. +> [!WARNING] +> The conversion to and from Julian Date numerical values can be inaccurate, especially for ancient dates back to thousands of years BC. Ideally Julian Date values should be used for range and ordering operations only where complete accuracy is not required. They should __not__ be used for definitive storage or for display after roundtrip conversions. Example usage: From 4592e9ba50e68a6fbd87939a8f0da5c711ddccb4 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Mon, 17 Jun 2024 10:56:46 +0200 Subject: [PATCH 064/135] drop six dependency --- edtf/natlang/en.py | 5 ++--- pyproject.toml | 1 - requirements.txt | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f6eef54..f28e685 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -4,7 +4,6 @@ from datetime import datetime from dateutil.parser import ParserError, parse -from six.moves import xrange from edtf import appsettings @@ -216,7 +215,7 @@ def text_to_edtf_date(text): mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) - for i in xrange(len(date1)): + for i in range(len(date1)): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. @@ -238,7 +237,7 @@ def text_to_edtf_date(text): # strip off unknown chars from end of string - except the first 4 - for i in reversed(xrange(len(result))): + for i in reversed(range(len(result))): if result[i] not in ("X", "-"): smallest_length = 4 diff --git a/pyproject.toml b/pyproject.toml index f533477..860741e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,6 @@ version = "5.0.0" dependencies = [ "python-dateutil", "pyparsing", - "six" ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" requires-python = ">=3.8" diff --git a/requirements.txt b/requirements.txt index 0ab3a7d..1656e27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ python-dateutil pyparsing -six From 2dee3d08e41e02fc1c1f6d9c777f7b1800c7457c Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 27 Jun 2024 16:52:22 +1000 Subject: [PATCH 065/135] WIP adding checks for edtf field aliases #62 --- edtf/fields.py | 42 +++++++++++++++++++++ edtf_django_tests/edtf_integration/tests.py | 35 +++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/edtf/fields.py b/edtf/fields.py index 7dba5d4..9cf6b27 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -1,5 +1,6 @@ import pickle +from django.core import checks from django.core.exceptions import FieldDoesNotExist from django.db import models from django.db.models import signals @@ -188,3 +189,44 @@ def contribute_to_class(self, cls, name, **kwargs): # Only run post-initialization values update on non-abstract models if not cls._meta.abstract: signals.post_init.connect(self.update_values, sender=cls) + + def check(self, **kwargs): + errors = super().check(**kwargs) + + for field_alias in [ + "direct_input_field", + "lower_fuzzy_field", + "lower_strict_field", + "natural_text_field", + "upper_fuzzy_field", + "upper_strict_field", + ]: + errors.extend(self._check_field(field_alias)) + + return errors + + def _check_field(self, field_alias): + field_name = getattr(self, field_alias, None) + + # Check if the alias value has been provided in the field definition + if not field_name: + return [ + checks.Error( + f"You must specify a '{field_alias}' for EDTFField", + hint=None, + obj=self, + ) + ] + + # Check if the field that is referenced actually exists + try: + self.model._meta.get_field(field_name) + except FieldDoesNotExist: + return [ + checks.Error( + f"'{self.name}' refers to a non-existent '{field_alias}' field: '{field_name}'", + hint=None, + obj=self, + ) + ] + return [] diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index 493d0d2..da5bb83 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -122,3 +122,38 @@ def test_comparison(self): self.event2.date_edtf, "2019-11 is less than 2021-05-06", ) + + def test_field_related_field_specification(self): + edtf_field_on_model = TestEvent._meta.get_field("date_edtf") + required_fields = ( + "direct_input_field", + "lower_fuzzy_field", + "lower_strict_field", + "natural_text_field", + "upper_fuzzy_field", + "upper_strict_field", + ) + for field_alias in required_fields: + # Remove the alias from the edtf_field + orig_value = getattr(edtf_field_on_model, field_alias) + setattr(edtf_field_on_model, field_alias, None) + errors = edtf_field_on_model.check() + self.assertEqual(len(errors), 1) + self.assertTrue(field_alias in errors[0].msg) + # Replace the field so later tests can still work + setattr(edtf_field_on_model, field_alias, orig_value) + + # TODO: this is not working yet + # # Remove the field from the model + # referenced_field_name = getattr(edtf_field_on_model, field_alias) + # orig_fields = TestEvent._meta.local_fields + # TestEvent._meta.local_fields = [ # type: ignore + # field + # for field in TestEvent._meta.local_fields + # if field.name != referenced_field_name + # ] + # errors = TestEvent._meta.get_field("date_edtf").check() + # self.assertEqual(len(errors), 1) + # self.assertTrue(referenced_field_name in errors[0].msg) + # # Replace the field so later tests can still work + # TestEvent._meta.local_fields = orig_fields From 1a5ebd53e0f78c17f4e037569a089a082cf6b8fb Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 27 Jun 2024 21:35:53 +1000 Subject: [PATCH 066/135] Tests for aliases that point to non-existent fields #62 --- edtf/fields.py | 2 ++ edtf_django_tests/edtf_integration/tests.py | 28 +++++++++------------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index 9cf6b27..07a9744 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -215,6 +215,7 @@ def _check_field(self, field_alias): f"You must specify a '{field_alias}' for EDTFField", hint=None, obj=self, + id="python-edtf.EDTF01", ) ] @@ -227,6 +228,7 @@ def _check_field(self, field_alias): f"'{self.name}' refers to a non-existent '{field_alias}' field: '{field_name}'", hint=None, obj=self, + id="python-edtf.EDTF02", ) ] return [] diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index da5bb83..aa1bf34 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -140,20 +140,16 @@ def test_field_related_field_specification(self): errors = edtf_field_on_model.check() self.assertEqual(len(errors), 1) self.assertTrue(field_alias in errors[0].msg) - # Replace the field so later tests can still work - setattr(edtf_field_on_model, field_alias, orig_value) + # Should be an 'alias not specified' error + self.assertEqual(errors[0].id, "python-edtf.EDTF01") + + # Point the alias to a non-existent field + setattr(edtf_field_on_model, field_alias, "fake") + errors = edtf_field_on_model.check() + self.assertEqual(len(errors), 1) + self.assertTrue(field_alias in errors[0].msg) + # Should be a 'non-eixstent field' error + self.assertEqual(errors[0].id, "python-edtf.EDTF02") - # TODO: this is not working yet - # # Remove the field from the model - # referenced_field_name = getattr(edtf_field_on_model, field_alias) - # orig_fields = TestEvent._meta.local_fields - # TestEvent._meta.local_fields = [ # type: ignore - # field - # for field in TestEvent._meta.local_fields - # if field.name != referenced_field_name - # ] - # errors = TestEvent._meta.get_field("date_edtf").check() - # self.assertEqual(len(errors), 1) - # self.assertTrue(referenced_field_name in errors[0].msg) - # # Replace the field so later tests can still work - # TestEvent._meta.local_fields = orig_fields + # Repair the field so later tests can still work + setattr(edtf_field_on_model, field_alias, orig_value) From d313a9d2ed57e5f88871da51f1b48ea875f244f8 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Sun, 7 Jul 2024 17:19:23 +1000 Subject: [PATCH 067/135] Note on benchmarks location --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2aaef05..7542c36 100644 --- a/README.md +++ b/README.md @@ -515,3 +515,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut * Check formatting: `ruff format --check --config pyproject.toml` * Fix formatting: `ruff format --config pyproject.toml` * Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. + +### Coverage and benchmraks + +Coverage reports are generated and added as comments to commits, and also visible in the actions log. Benchmarks are run on pull requests and are published [here]( https://ixc.github.io/python-edtf/dev/bench/) and also visible in the actions log. From 2112db720a6e0f0d06df6bdac512d214c6e25de7 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 18 Jul 2024 17:44:38 +1000 Subject: [PATCH 068/135] Correct README file name --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 860741e..b48c3f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ dependencies = [ ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" requires-python = ">=3.8" -readme = {file = "README.txt", content-type = "text/markdown"} +readme = {file = "README.md", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, { name = "Alastair Weakley"}, From 6e508d016e9bbcc49b90d3c88ca3512d69a0d193 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 23 Jul 2024 17:03:14 +0200 Subject: [PATCH 069/135] Optimized regexes --- edtf/natlang/en.py | 126 ++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 5263e07..4f68f21 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,9 +1,10 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" from datetime import datetime +from typing import Optional + from dateutil.parser import parse import re from edtf import appsettings -from six.moves import xrange # two dates where every digit of an ISO date representation is different, @@ -12,24 +13,43 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r'(-?)([\du])([\dxu])([\dxu])([\dxu])' -LONG_YEAR_RE = r'y(-?)([1-9]\d\d\d\d+)' -CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?' -CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)' +SHORT_YEAR_RE = re.compile(r'(-?)([\du])([\dxu])([\dxu])([\dxu])') +LONG_YEAR_RE = re.compile(r'y(-?)([1-9]\d\d\d\d+)') +CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?') +CENTURY_RANGE = re.compile(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]') +CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)') +ONE_DIGIT_PARTIAL_FIRST = re.compile(r'\d\D\b') +TWO_DIGIT_PARTIAL_FIRST = re.compile(r'\d\d\b') +PARTIAL_CHECK = re.compile(r'\b\d\d\d\d$') +SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)") +BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") +AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") +APPROX_CHECK = re.compile(r'\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)') +UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") +UNCERTAIN_REPL = re.compile(r'(\d{4})\?') +MIGHT_BE_CENTURY = re.compile(r'(\d{2}00)s') +MIGHT_BE_DECADE = re.compile(r'(\d{3}0)s') + +APPROX_CENTURY_RE = re.compile(r'\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?') +UNCERTAIN_CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?') + +APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)') +UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?') + # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. REJECT_RULES = ( - r'.*dynasty.*', # Don't parse '23rd Dynasty' to 'uuuu-uu-23' + re.compile(r'.*dynasty.*'), # Don't parse '23rd Dynasty' to 'uuuu-uu-23' ) -def text_to_edtf(text): +def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. """ if not text: - return + return None t = text.lower() @@ -51,18 +71,18 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r'\d\D\b', d2): # 1-digit year partial e.g. 1868-9 - if re.search(r'\b\d\d\d\d$', d1): # TODO: evaluate it and see if it's a year + if re.match(ONE_DIGIT_PARTIAL_FIRST, d2): # 1-digit year partial e.g. 1868-9 + if re.search(PARTIAL_CHECK, d1): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r'\d\d\b', d2): # 2-digit year partial e.g. 1809-10 - if re.search(r'\b\d\d\d\d$', d1): + elif re.match(TWO_DIGIT_PARTIAL_FIRST, d2): # 2-digit year partial e.g. 1809-10 + if re.search(PARTIAL_CHECK, d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]', "%s-%s" % (d1,d2)) + century_range_match = re.search(CENTURY_RANGE, f"{d1}-{d2}") if century_range_match: g = century_range_match.groups() - d1 = "%sC" % g[0] - d2 = "%sC" % g[2] + d1 = f"{g[0]}C" + d2 = f"{g[2]}C" r1 = text_to_edtf_date(d1) r2 = text_to_edtf_date(d2) @@ -77,9 +97,9 @@ def text_to_edtf(text): # This whole section could be more friendly. else: - int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) + int_match = re.search(SLASH_YEAR, list_item) if int_match: - return "[%s, %s]" % (int_match.group(1), int_match.group(2)) + return f"[{int_match.group(1)}, {int_match.group(2)}]" result = text_to_edtf_date(list_item) if result: @@ -87,23 +107,18 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r'\bbefore\b', t) - is_before = is_before or re.findall(r'\bearlier\b', t) - is_before = is_before or re.findall(r'\baprés\b', t) - - is_after = re.findall(r'\bafter\b', t) - is_after = is_after or re.findall(r'\bsince\b', t) - is_after = is_after or re.findall(r'\blater\b', t) + is_before = re.findall(BEFORE_CHECK, t) + is_after = re.findall(AFTER_CHECK, t) if is_before: - result = u"unknown/%s" % result + result = f"unknown/{result}" elif is_after: - result = u"%s/unknown" % result + result = f"{result}/unknown" return result -def text_to_edtf_date(text): +def text_to_edtf_date(text) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. @@ -112,39 +127,29 @@ def text_to_edtf_date(text): differ are undefined. """ if not text: - return + return None t = text.lower() result = '' for reject_re in REJECT_RULES: if re.match(reject_re, t): - return + return None # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r'(\d{2}00)s', t) + could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r'(\d{3}0)s', r'\1', t) + t, is_decade = re.subn(MIGHT_BE_DECADE, r'\1', t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r'\b(ca?\.?) ?\d{4}', t) + is_approximate = re.findall(APPROX_CHECK, t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r'\bcirca\b', t) - # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or \ - re.findall(r'\b(approx|approximately|around|about)', t) - # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r'\b~\d{4}', t) - # a ~ at the beginning - is_approximate = is_approximate or re.findall(r'^~', t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r'(\d{4})\?', r'\1', t) - # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall( - r'\b(uncertain|possibly|maybe|guess)', t) + t, is_uncertain = re.subn(UNCERTAIN_REPL, r'\1', t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -153,27 +158,23 @@ def text_to_edtf_date(text): is_ce = re.findall(CE_RE, t) if is_century: result = "%02dxx" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t) + is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) try: - is_bc = is_century[0][-1] in ("bc", "bce") - if is_bc: - result = "-%s" % result + if is_century[0][-1] in ("bc", "bce"): + result = f"-{result}" except IndexError: pass elif is_ce: result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r'\?', t) + is_approximate = is_approximate or re.findall(APPROX_CE_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t) try: - is_bc = is_ce[0][-1] in ("bc", "bce") - if is_bc: - result = "-%s" % result + if is_ce[0][-1] in ("bc", "bce"): + result = f"-{result}" except IndexError: pass @@ -200,12 +201,12 @@ def text_to_edtf_date(text): ) except ValueError: - return + return None if dt1.date() == DEFAULT_DATE_1.date() and \ dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. - return + return None date1 = dt1.isoformat()[:10] date2 = dt2.isoformat()[:10] @@ -215,14 +216,13 @@ def text_to_edtf_date(text): mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t) mentions_day = re.findall(r'\bday\b.+(in|during)\b', t) - for i in xrange(len(date1)): + for i in range(len(date1)): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. - if i == 2 and could_be_century and \ - not (is_approximate or is_uncertain): + if i == 2 and could_be_century and not (is_approximate or is_uncertain): result += 'x' - elif i == 3 and is_decade > 0: + elif i == 3 and is_decade: if mentions_year: result += 'u' # year precision else: @@ -238,7 +238,7 @@ def text_to_edtf_date(text): # strip off unknown chars from end of string - except the first 4 - for i in reversed(xrange(len(result))): + for i in reversed(range(len(result))): if result[i] not in ('u', 'x', '-'): smallest_length = 4 From f2252f03c23b1f7a6a153ccf750e97a94ce71dd2 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 23 Jul 2024 17:18:26 +0200 Subject: [PATCH 070/135] Package updates --- edtf/convert.py | 8 +- edtf/jdutil.py | 32 +++---- edtf/natlang/en.py | 11 ++- edtf/natlang/tests.py | 4 +- edtf/parser/grammar.py | 14 +-- edtf/parser/parser_classes.py | 159 +++++++++++++++++----------------- edtf/parser/tests.py | 66 +++++++------- 7 files changed, 152 insertions(+), 142 deletions(-) diff --git a/edtf/convert.py b/edtf/convert.py index c1bfd3a..de1f2a2 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -59,8 +59,7 @@ def trim_struct_time(st, strip_time=False): """ if strip_time: return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) + return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) def struct_time_to_jd(st): @@ -106,7 +105,7 @@ def jd_to_struct_time(jd): ) -def _roll_negative_time_fields(year, month, day, hour, minute, second): +def _roll_negative_time_fields(year, month, day, hour, minute, second) -> tuple: """ Fix date/time fields which have nonsense negative values for any field except for year by rolling the overall date/time value backwards, treating @@ -142,4 +141,5 @@ def _roll_negative_time_fields(year, month, day, hour, minute, second): year += int(month / 12.0) # Adjust by whole year in months year -= 1 # Subtract 1 for negative minutes month %= 12 # Convert negative month to positive remainder - return (year, month, day, hour, minute, second) + + return year, month, day, hour, minute, second diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 9fabdd1..4a12b58 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -17,7 +17,8 @@ # 10-14-1582 never occurred. Python datetime objects will produce incorrect # time deltas if one date is from before 10-15-1582. -def mjd_to_jd(mjd): + +def mjd_to_jd(mjd: float) -> float: """ Convert Modified Julian Day to Julian Day. @@ -30,13 +31,11 @@ def mjd_to_jd(mjd): ------- jd : float Julian Day - - """ return mjd + 2400000.5 -def jd_to_mjd(jd): +def jd_to_mjd(jd: float) -> float: """ Convert Julian Day to Modified Julian Day @@ -54,7 +53,7 @@ def jd_to_mjd(jd): return jd - 2400000.5 -def date_to_jd(year,month,day): +def date_to_jd(year: int, month: int, day: float) -> float: """ Convert a date to Julian Day. @@ -117,7 +116,7 @@ def date_to_jd(year,month,day): return jd -def jd_to_date(jd): +def jd_to_date(jd: float) -> (int, int, float): """ Convert Julian Day to date. @@ -184,7 +183,10 @@ def jd_to_date(jd): return year, month, day -def hmsm_to_days(hour=0,min=0,sec=0,micro=0): +def hmsm_to_days(hour: int = 0, + min: int = 0, + sec: int = 0, + micro: int = 0) -> float: """ Convert hours, minutes, seconds, and microseconds to fractional days. @@ -222,7 +224,7 @@ def hmsm_to_days(hour=0,min=0,sec=0,micro=0): return days / 24. -def days_to_hmsm(days): +def days_to_hmsm(days: float) -> (int, int, int, int): """ Convert fractional days to hours, minutes, seconds, and microseconds. Precision beyond microseconds is rounded to the nearest microsecond. @@ -271,7 +273,7 @@ def days_to_hmsm(days): return int(hour), int(min), int(sec), int(micro) -def datetime_to_jd(date): +def datetime_to_jd(date: dt.datetime) -> float: """ Convert a `datetime.datetime` object to Julian Day. @@ -298,7 +300,7 @@ def datetime_to_jd(date): return date_to_jd(date.year,date.month,days) -def jd_to_datetime(jd): +def jd_to_datetime(jd: float) -> dt.datetime: """ Convert a Julian Day to an `jdutil.datetime` object. @@ -328,7 +330,7 @@ def jd_to_datetime(jd): return datetime(year,month,day,hour,min,sec,micro) -def timedelta_to_days(td): +def timedelta_to_days(td: dt.timedelta) -> float: """ Convert a `datetime.timedelta` object to a total number of days. @@ -372,7 +374,7 @@ class datetime(dt.datetime): datetime.datetime : Parent class. """ - def __add__(self,other): + def __add__(self, other): if not isinstance(other,dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -383,7 +385,7 @@ def __add__(self,other): return jd_to_datetime(combined) - def __radd__(self,other): + def __radd__(self, other): if not isinstance(other,dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -394,7 +396,7 @@ def __radd__(self,other): return jd_to_datetime(combined) - def __sub__(self,other): + def __sub__(self, other): if isinstance(other,dt.timedelta): days = timedelta_to_days(other) @@ -412,7 +414,7 @@ def __sub__(self,other): s += "datetime.timedelta, jdutil.datetime and datetime.datetime" raise TypeError(s) - def __rsub__(self,other): + def __rsub__(self, other): if not isinstance(other, (datetime,dt.datetime)): s = "jdutil.datetime supports '-' with: " s += "jdutil.datetime and datetime.datetime" diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 4f68f21..8cb72c4 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -36,6 +36,11 @@ APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)') UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?') +MENTIONS_YEAR = re.compile(r'\byear\b.+(in|during)\b') +MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b') +MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b') + + # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. @@ -212,9 +217,9 @@ def text_to_edtf_date(text) -> Optional[str]: date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r'\byear\b.+(in|during)\b', t) - mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t) - mentions_day = re.findall(r'\bday\b.+(in|during)\b', t) + mentions_year = re.findall(MENTIONS_YEAR, t) + mentions_month = re.findall(MENTIONS_MONTH, t) + mentions_day = re.findall(MENTIONS_DAY, t) for i in range(len(date1)): # if the given year could be a century (e.g. '1800s') then use diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index ea137d2..d18ec76 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -207,8 +207,8 @@ def test_natlang(self): """ for i, o in EXAMPLES: e = text_to_edtf(i) - print("%s => %s" % (i, e)) - self.assertEqual(e, o) + print(f"{i} => {e}") + self.assertEqual(e, o, msg=f"Testing {i}") if __name__ == '__main__': diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index d612c5f..14cb3a4 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -282,14 +282,16 @@ def f(toks): edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") -def parse_edtf(str, parseAll=True, fail_silently=False): +def parse_edtf(inp: str, parse_all: bool = True, fail_silently: bool = False): + if not inp: + raise ParseException("You must supply some input text") + try: - if not str: - raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) - if p: - return p[0] + p = edtfParser.parseString(inp.strip(), parse_all) except ParseException as e: if fail_silently: return None raise EDTFParseException(e) + + if p: + return p[0] diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index b670296..ae7adb4 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -3,6 +3,7 @@ from time import struct_time from datetime import date, datetime from operator import add, sub +from typing import Optional from dateutil.relativedelta import relativedelta @@ -22,7 +23,7 @@ PRECISION_DAY = "day" -def days_in_month(year, month): +def days_in_month(year: int, month: int) -> dict: """ Return the number of days in the given year and month, where month is 1=January to 12=December, and respecting leap years as identified by @@ -85,11 +86,15 @@ def apply_delta(op, time_struct, delta): class EDTFObject(object): """ - Object to attact to a parser to become instantiated when the parser + Object to attach to a parser to become instantiated when the parser completes. """ parser = None + def __init__(self, *args, **kwargs): + errmsg: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{errmsg} is not implemented.") + @classmethod def set_parser(cls, p): cls.parser = p @@ -99,7 +104,7 @@ def set_parser(cls, p): def parse_action(cls, toks): kwargs = toks.asDict() try: - return cls(**kwargs) # replace the token list with the class + return cls(**kwargs) # replace the token list with the class except Exception as e: print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) raise e @@ -109,19 +114,12 @@ def parse(cls, s): return cls.parser.parseString(s)[0] def __repr__(self): - return "%s: '%s'" % (type(self).__name__, str(self)) - - def __init__(self, *args, **kwargs): - str = "%s.__init__(*%s, **%s)" % ( - type(self).__name__, - args, kwargs, - ) - raise NotImplementedError("%s is not implemented." % str) + return f"{type(self).__name__}: '{str(self)}'" def __str__(self): raise NotImplementedError - def _strict_date(self, lean): + def _strict_date(self, lean: str): raise NotImplementedError def lower_strict(self): @@ -130,7 +128,7 @@ def lower_strict(self): def upper_strict(self): return self._strict_date(lean=LATEST) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str): """ Subclasses should override this to pad based on how precise they are. """ @@ -216,41 +214,40 @@ def __le__(self, other): # (* ************************** Level 0 *************************** *) class Date(EDTFObject): + def __init__(self, year=None, month=None, day=None, **kwargs): + for param in ('date', 'lower', 'upper'): + if param in kwargs: + self.__init__(**kwargs[param]) + return + + self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.month = month + self.day = day - def set_year(self, y): + def set_year(self, y: int): if y is None: raise AttributeError("Year must not be None") self._year = y - def get_year(self): + def get_year(self) -> int: return self._year year = property(get_year, set_year) - def set_month(self, m): + def set_month(self, m: Optional[int]): self._month = m - if m == None: + if m is None: self.day = None - def get_month(self): + def get_month(self) -> Optional[int]: return self._month month = property(get_month, set_month) - def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ('date', 'lower', 'upper'): - if param in kwargs: - self.__init__(**kwargs[param]) - return - - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - def __str__(self): r = self.year if self.month: - r += "-%s" % self.month + r += f"-{self.month}" if self.day: - r += "-%s" % self.day + r += f"-{self.day}" return r def isoformat(self, default=date.max): @@ -260,14 +257,14 @@ def isoformat(self, default=date.max): int(self.day or default.day), ) - def _precise_year(self, lean): + def _precise_year(self, lean: str): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: return int(re.sub(r'[xu]', r'0', self.year)) else: return int(re.sub(r'[xu]', r'9', self.year)) - def _precise_month(self, lean): + def _precise_month(self, lean: str): if self.month and self.month != "uu": try: return int(self.month) @@ -276,7 +273,7 @@ def _precise_month(self, lean): else: return 1 if lean == EARLIEST else 12 - def _precise_day(self, lean): + def _precise_day(self, lean: str): if not self.day or self.day == 'uu': if lean == EARLIEST: return 1 @@ -343,7 +340,7 @@ def __init__(self, lower, upper): self.upper = upper def __str__(self): - return "%s/%s" % (self.lower, self.upper) + return f"{self.lower}/{self.upper}" def _strict_date(self, lean): if lean == EARLIEST: @@ -416,8 +413,8 @@ def __str__(self): def _strict_date(self, lean): if self.date == "open": return dt_to_struct_time(date.today()) - if self.date =="unknown": - return None # depends on the other date + if self.date == "unknown": + return None # depends on the other date return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -454,12 +451,12 @@ def __init__(self, year): self.year = year def __str__(self): - return "y%s" % self.year + return f"y{self.year}" def _precise_year(self): return int(self.year) - def _strict_date(self, lean): + def _strict_date(self, lean: str): py = self._precise_year() if lean == EARLIEST: return struct_time( @@ -478,30 +475,26 @@ def __init__(self, year, season, **kwargs): self.day = None def __str__(self): - return "%s-%s" % (self.year, self.season) + return f"{self.year}-{self.season}" def _precise_month(self, lean): rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] - else: - return rng[1] + + return rng[1] # (* ************************** Level 2 *************************** *) class PartialUncertainOrApproximate(Date): - - def set_year(self, y): # Year can be None. - self._year = y - year = property(Date.get_year, set_year) - def __init__( self, year=None, month=None, day=None, - year_ua=False, month_ua = False, day_ua = False, - year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False + year_ua: Optional[UA] = None, month_ua: Optional[UA] = None, + day_ua: Optional[UA] = None, year_month_ua: Optional[UA] = None, + month_day_ua: Optional[UA] = None, ssn=None, + season_ua: Optional[UA] = None, all_ua: Optional[UA] = None ): self.year = year self.month = month @@ -520,56 +513,60 @@ def __init__( self.all_ua = all_ua def __str__(self): - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) + return f"{self.season}{self.season_ua}" if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) + y = f"{self.year}{self.year_ua}" else: y = str(self.year) if self.month_ua: - m = "(%s)%s" % (self.month, self.month_ua) + m = f"({self.month}){self.month_ua}" else: m = str(self.month) if self.day: if self.day_ua: - d = "(%s)%s" % (self.day, self.day_ua) + d = f"({self.day}){self.day_ua}" else: d = str(self.day) else: d = None if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) + ym = f"{y}-{m}{self.year_month_ua}" if d: - result = "%s-%s" % (ym, d) + result = f"{ym}-{d}" else: result = ym + elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + if self.year_ua: # we don't need the brackets round month and day + result = f"{y}-{m}-{d}{self.month_day_ua}" else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + result = f"{y}-({m}-{d}){self.month_day_ua}" else: if d: - result = "%s-%s-%s" % (y, m, d) + result = f"{y}-{m}-{d}" else: - result = "%s-%s" % (y, m) + result = f"{y}-{m}" if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) + result = f"({result}){self.all_ua}" return result - def _precise_year(self, lean): + def set_year(self, y): # Year can be None. + self._year = y + year = property(Date.get_year, set_year) + + def _precise_year(self, lean: str): if self.season: return self.season._precise_year(lean) return super(PartialUncertainOrApproximate, self)._precise_year(lean) - def _precise_month(self, lean): + def _precise_month(self, lean: str): if self.season: return self.season._precise_month(lean) return super(PartialUncertainOrApproximate, self)._precise_month(lean) @@ -638,7 +635,7 @@ def __init__(self, lower=None, upper=None): self.upper = upper def __str__(self): - return "%s..%s" % (self.lower or '', self.upper or '') + return f"{self.lower or ''}..{self.upper or ''}" class EarlierConsecutives(Consecutives): @@ -650,41 +647,40 @@ class LaterConsecutives(Consecutives): class OneOfASet(EDTFObject): + def __init__(self, *args): + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "[%s]" % (", ".join([str(o) for o in self.objects])) + return f"[{', '.join([str(o) for o in self.objects])}]" - def _strict_date(self, lean): + def _strict_date(self, lean: str): if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) + + return min([x._strict_date(lean) for x in self.objects]) class MultipleDates(EDTFObject): + def __init__(self, *args): + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "{%s}" % (", ".join([str(o) for o in self.objects])) + return f"{{{', '.join([str(o) for o in self.objects])}}}" def _strict_date(self, lean): if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) + return min([x._strict_date(lean) for x in self.objects]) class MaskedPrecision(Date): @@ -695,12 +691,13 @@ class Level2Interval(Level1Interval): def __init__(self, lower, upper): # Check whether incoming lower/upper values are single-item lists, and # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of + # bug in the grammar that provides us with single-item lists of # `PartialUncertainOrApproximate` items for lower/upper values. if isinstance(lower, (tuple, list)) and len(lower) == 1: self.lower = lower[0] else: self.lower = lower + if isinstance(lower, (tuple, list)) and len(upper) == 1: self.upper = upper[0] else: @@ -718,7 +715,7 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%se%sp%s' % (self.base, self.exponent, self.precision) + return f'{self.base}e{self.exponent}p{self.precision}' else: - return '%se%s' % (self.base, self.exponent) + return f'{self.base}e{self.exponent}' year = property(get_year) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index f9dde42..77c2ad3 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -3,10 +3,11 @@ from datetime import date from time import struct_time +from pyparsing import ParseException + from edtf.parser.grammar import parse_edtf as parse from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ TIME_EMPTY_EXTRAS -from edtf.parser.edtf_exceptions import EDTFParseException # Example object types and attributes. # the first item in each tuple is the input EDTF string, and expected parse result. @@ -192,17 +193,30 @@ None, '', 'not a edtf string', - 'y17e7-12-26', # not implemented - '2016-13-08', # wrong day order - '2016-02-39', # out of range + 'y17e7-12-26', # not implemented + '2016-13-08', # wrong day order + '2016-02-39', # out of range '-0000-01-01', # negative zero year ) class TestParsing(unittest.TestCase): + def iso_to_struct_time(self, iso_date): + """ Convert YYYY-mm-dd date strings to time structs """ + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time( + [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + def test_non_parsing(self): for i in BAD_EXAMPLES: - self.assertRaises(EDTFParseException, parse, i) + self.assertRaises(ParseException, parse, i) def test_date_values(self): """ @@ -217,13 +231,15 @@ def test_date_values(self): else: o = i - sys.stdout.write("parsing '%s'" % i) + sys.stdout.write(f"parsing '{i}'") f = parse(i) - sys.stdout.write(" => %s()\n" % type(f).__name__) + sys.stdout.write(f" => {type(f).__name__}()\n") self.assertIsInstance(f, EDTFObject) - self.assertEqual(str(f), o) + self.assertEqual(str(f), o, msg=f"Testing {i}") - if len(e) == 5: + if len(e) == 1: + continue + elif len(e) == 5: expected_lower_strict = e[1] expected_upper_strict = e[2] expected_lower_fuzzy = e[3] @@ -243,33 +259,21 @@ def test_date_values(self): expected_upper_strict = e[1] expected_lower_fuzzy = e[1] expected_upper_fuzzy = e[1] - if len(e) == 1: + else: + print(f"Unexpected value {e}; skipping.") continue - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) - expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + exp_lower_str = self.iso_to_struct_time(expected_lower_strict) + exp_upper_str = self.iso_to_struct_time(expected_upper_strict) + exp_lower_fuzz = self.iso_to_struct_time(expected_lower_fuzzy) + exp_upper_fuzz = self.iso_to_struct_time(expected_upper_fuzzy) try: - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) + self.assertEqual(f.lower_strict(), exp_lower_str) + self.assertEqual(f.upper_strict(), exp_upper_str) + self.assertEqual(f.lower_fuzzy(), exp_lower_fuzz) + self.assertEqual(f.upper_fuzzy(), exp_upper_fuzz) except Exception as x: # Write to stdout for manual debugging, I guess sys.stdout.write(str(x)) From 06ab934befb7a665301587134794ddbc50b60964 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Wed, 24 Jul 2024 11:18:51 +0200 Subject: [PATCH 071/135] Further optimizations --- edtf/natlang/en.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 8cb72c4..d7d7b8d 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,4 +1,5 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" +import functools from datetime import datetime from typing import Optional @@ -40,15 +41,12 @@ MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b') MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b') - - # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. -REJECT_RULES = ( - re.compile(r'.*dynasty.*'), # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -) +REJECT_RULES = re.compile(r'.*dynasty.*') # Don't parse '23rd Dynasty' to 'uuuu-uu-23' +@functools.lru_cache() def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. @@ -123,7 +121,8 @@ def text_to_edtf(text: str) -> Optional[str]: return result -def text_to_edtf_date(text) -> Optional[str]: +@functools.lru_cache() +def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. @@ -137,9 +136,8 @@ def text_to_edtf_date(text) -> Optional[str]: t = text.lower() result = '' - for reject_re in REJECT_RULES: - if re.match(reject_re, t): - return None + if re.match(REJECT_RULES, t): + return None # matches on '1800s'. Needs to happen before is_decade. could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) @@ -185,7 +183,6 @@ def text_to_edtf_date(text) -> Optional[str]: else: # try dateutil.parse - try: # parse twice, using different defaults to see what was # parsed and what was guessed. From bc0ec12e2d7e2acc580fd66fd6401d3f1d5ad9f8 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 28 Jul 2024 13:23:48 +0200 Subject: [PATCH 072/135] fix: fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7542c36..c0169db 100644 --- a/README.md +++ b/README.md @@ -253,7 +253,7 @@ The library includes a basic English natural language parser (it's not yet smart '1979-08~' ``` -Note that the result is a string, not an `ETDFObject`. +Note that the result is a string, not an `EDTFObject`. The parser can parse strings such as: From 934e0bfcd65f84d9abb001ac59a6a26fe289cbc1 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sun, 28 Jul 2024 13:30:44 +0200 Subject: [PATCH 073/135] fix: fix typos --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c0169db..2c1f34c 100644 --- a/README.md +++ b/README.md @@ -331,7 +331,7 @@ Since EDTF dates are often regions, and often imprecise, we need to use a few di ### `struct_time` date representation -Because Python's `datetime` module does not support dates out side the range 1 AD to 9999 AD we return dates as `time.struct_time` objects by default instead of the `datetime.date` or `datetime.datetime` objects you might expect. +Because Python's `datetime` module does not support dates outside the range 1 AD to 9999 AD we return dates as `time.struct_time` objects by default instead of the `datetime.date` or `datetime.datetime` objects you might expect. The `struct_time` representation is more difficult to work with, but can be sorted as-is which is the primary use-case, and can be converted relatively easily to `date` or `datetime` objects (provided the year is within 1 to 9999 AD) or to date objects in more flexible libraries like [astropy.time](http://docs.astropy.org/en/stable/time/index.html) for years outside these bounds. @@ -372,7 +372,7 @@ These dates indicate the earliest and latest dates that are __possible__ in the These values are useful for filtering results - i.e. testing which EDTF dates might conceivably fall into, or overlap, a desired date range. -The fuzzy dates are derived from the strict dates, plus or minus a level of padding that depends on how precise the date specfication is. For the case of approximate or uncertain dates, we (arbitrarily) pad the ostensible range by 100% of the uncertain timescale, or by a 12 weeks in the case of seasons. That is, if a date is approximate at the month scale, it is padded by a month. If it is approximate at the year scale, it is padded by a year: +The fuzzy dates are derived from the strict dates, plus or minus a level of padding that depends on how precise the date specification is. For the case of approximate or uncertain dates, we (arbitrarily) pad the ostensible range by 100% of the uncertain timescale, or by a 12 weeks in the case of seasons. That is, if a date is approximate at the month scale, it is padded by a month. If it is approximate at the year scale, it is padded by a year: ```python >>> e = parse_edtf('1912-04~') @@ -400,7 +400,7 @@ EDTF objects support properties that provide an overview of how the object is qu * `.is_approximate (~)` * `.is_uncertain_and_approximate (%)` -These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. +These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and approximate. ```python >>> parse_edtf("2006-06-11") From 21f73c5340d4c2aaa41f366524b8b902cf54bff8 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 31 Jul 2024 15:22:21 +0200 Subject: [PATCH 074/135] style: adding type hints --- edtf/convert.py | 13 ++++++------- edtf/jdutil.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/edtf/convert.py b/edtf/convert.py index a294462..581b76a 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -39,11 +39,10 @@ def dt_to_struct_time(dt): return struct_time( [dt.year, dt.month, dt.day] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS ) - else: - raise NotImplementedError(f"Cannot convert {type(dt)} to `struct_time`") + raise NotImplementedError(f"Cannot convert {type(dt)} to `struct_time`") -def struct_time_to_date(st): +def struct_time_to_date(st: struct_time) -> date: """ Return a `datetime.date` representing the provided `struct_time. @@ -52,7 +51,7 @@ def struct_time_to_date(st): return date(*st[:3]) -def struct_time_to_datetime(st): +def struct_time_to_datetime(st: struct_time) -> datetime: """ Return a `datetime.datetime` representing the provided `struct_time. @@ -61,7 +60,7 @@ def struct_time_to_datetime(st): return datetime(*st[:6]) -def trim_struct_time(st, strip_time=False): +def trim_struct_time(st: struct_time, strip_time=False) -> struct_time: """ Return a `struct_time` based on the one provided but with the extra fields `tm_wday`, `tm_yday`, and `tm_isdst` reset to default values. @@ -75,7 +74,7 @@ def trim_struct_time(st, strip_time=False): return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) -def struct_time_to_jd(st): +def struct_time_to_jd(st: struct_time) -> float: """ Return a float number representing the Julian Date for the given `struct_time`. @@ -91,7 +90,7 @@ def struct_time_to_jd(st): return jdutil.date_to_jd(year, month, day) -def jd_to_struct_time(jd): +def jd_to_struct_time(jd: float) -> struct_time: """ Return a `struct_time` converted from a Julian Date float number. diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 16cd312..7c0a3bd 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -18,7 +18,7 @@ # time deltas if one date is from before 10-15-1582. -def mjd_to_jd(mjd): +def mjd_to_jd(mjd: float) -> float: """ Convert Modified Julian Day to Julian Day. @@ -37,7 +37,7 @@ def mjd_to_jd(mjd): return mjd + 2400000.5 -def jd_to_mjd(jd): +def jd_to_mjd(jd: float) -> float: """ Convert Julian Day to Modified Julian Day @@ -55,7 +55,7 @@ def jd_to_mjd(jd): return jd - 2400000.5 -def date_to_jd(year, month, day): +def date_to_jd(year: int, month: int, day: float) -> float: """ Convert a date to Julian Day. @@ -117,7 +117,7 @@ def date_to_jd(year, month, day): return jd -def jd_to_date(jd): +def jd_to_date(jd: float) -> tuple: """ Convert Julian Day to date. @@ -175,7 +175,7 @@ def jd_to_date(jd): return year, month, day -def hmsm_to_days(hour=0, min=0, sec=0, micro=0): +def hmsm_to_days(hour: int = 0, min: int = 0, sec: int = 0, micro: int = 0) -> float: """ Convert hours, minutes, seconds, and microseconds to fractional days. @@ -262,7 +262,7 @@ def days_to_hmsm(days): return int(hour), int(min), int(sec), int(micro) -def datetime_to_jd(date): +def datetime_to_jd(date: dt.datetime) -> float: """ Convert a `datetime.datetime` object to Julian Day. @@ -291,7 +291,7 @@ def datetime_to_jd(date): return date_to_jd(date.year, date.month, days) -def jd_to_datetime(jd): +def jd_to_datetime(jd: float) -> dt.datetime: """ Convert a Julian Day to an `jdutil.datetime` object. @@ -321,7 +321,7 @@ def jd_to_datetime(jd): return datetime(year, month, day, hour, min, sec, micro) -def timedelta_to_days(td): +def timedelta_to_days(td: dt.timedelta) -> float: """ Convert a `datetime.timedelta` object to a total number of days. From 2e69104706f3c03d5f59faf56fa3df98daa0dc33 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 31 Jul 2024 15:29:52 +0200 Subject: [PATCH 075/135] style: adding one more hint --- edtf/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/convert.py b/edtf/convert.py index 581b76a..ee03f36 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -60,7 +60,7 @@ def struct_time_to_datetime(st: struct_time) -> datetime: return datetime(*st[:6]) -def trim_struct_time(st: struct_time, strip_time=False) -> struct_time: +def trim_struct_time(st: struct_time, strip_time: bool = False) -> struct_time: """ Return a `struct_time` based on the one provided but with the extra fields `tm_wday`, `tm_yday`, and `tm_isdst` reset to default values. From 130898d751a4f5284080506916f5935fdb2130d2 Mon Sep 17 00:00:00 2001 From: aweakley Date: Thu, 1 Aug 2024 23:10:00 +1000 Subject: [PATCH 076/135] Delete dummy workflow No longer required now that actions run in the repo --- .github/workflows/dummy.yml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 .github/workflows/dummy.yml diff --git a/.github/workflows/dummy.yml b/.github/workflows/dummy.yml deleted file mode 100644 index af2aeba..0000000 --- a/.github/workflows/dummy.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: dummy-github-action - -on: [push] -jobs: - print-message: - runs-on: ubuntu-latest - steps: - - run: echo "Dummy Action to initialise Actions for the repository" From c9cb56fe7dfcfe3f55ee981106bce7e73e7b7554 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 12 Aug 2024 14:27:41 +0200 Subject: [PATCH 077/135] Update gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index ba74660..4d58675 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,5 @@ docs/_build/ # PyBuilder target/ +.idea +.DS_Store From 9e51373eea989f4ea306408138b31ce53bdef1ab Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Aug 2024 15:01:47 +0200 Subject: [PATCH 078/135] Black formatting, updates --- edtf/natlang/en.py | 101 +++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index d7d7b8d..191199e 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -14,36 +14,42 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = re.compile(r'(-?)([\du])([\dxu])([\dxu])([\dxu])') -LONG_YEAR_RE = re.compile(r'y(-?)([1-9]\d\d\d\d+)') -CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?') -CENTURY_RANGE = re.compile(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]') -CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)') -ONE_DIGIT_PARTIAL_FIRST = re.compile(r'\d\D\b') -TWO_DIGIT_PARTIAL_FIRST = re.compile(r'\d\d\b') -PARTIAL_CHECK = re.compile(r'\b\d\d\d\d$') +SHORT_YEAR_RE = re.compile(r"(-?)([\du])([\dxu])([\dxu])([\dxu])") +LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)") +CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?") +CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]") +CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)") +ONE_DIGIT_PARTIAL_FIRST = re.compile(r"\d\D\b") +TWO_DIGIT_PARTIAL_FIRST = re.compile(r"\d\d\b") +PARTIAL_CHECK = re.compile(r"\b\d\d\d\d$") SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)") BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") -APPROX_CHECK = re.compile(r'\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)') +APPROX_CHECK = re.compile( + r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)" +) UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") -UNCERTAIN_REPL = re.compile(r'(\d{4})\?') -MIGHT_BE_CENTURY = re.compile(r'(\d{2}00)s') -MIGHT_BE_DECADE = re.compile(r'(\d{3}0)s') +UNCERTAIN_REPL = re.compile(r"(\d{4})\?") +MIGHT_BE_CENTURY = re.compile(r"(\d{2}00)s") +MIGHT_BE_DECADE = re.compile(r"(\d{3}0)s") -APPROX_CENTURY_RE = re.compile(r'\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?') -UNCERTAIN_CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?') +APPROX_CENTURY_RE = re.compile( + r"\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +) +UNCERTAIN_CENTURY_RE = re.compile( + r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?" +) -APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)') -UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?') +APPROX_CE_RE = re.compile(r"\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)") +UNCERTAIN_CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)\?") -MENTIONS_YEAR = re.compile(r'\byear\b.+(in|during)\b') -MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b') -MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b') +MENTIONS_YEAR = re.compile(r"\byear\b.+(in|during)\b") +MENTIONS_MONTH = re.compile(r"\bmonth\b.+(in|during)\b") +MENTIONS_DAY = re.compile(r"\bday\b.+(in|during)\b") # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. -REJECT_RULES = re.compile(r'.*dynasty.*') # Don't parse '23rd Dynasty' to 'uuuu-uu-23' +REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' @functools.lru_cache() @@ -57,16 +63,16 @@ def text_to_edtf(text: str) -> Optional[str]: t = text.lower() # try parsing the whole thing - result = text_to_edtf_date(t) + result: Optional[str] = text_to_edtf_date(t) if not result: # split by list delims and move fwd with the first thing that returns a non-empty string. # TODO: assemble multiple dates into a {} or [] structure. for split in [",", ";", "or"]: for list_item in t.split(split): - # try parsing as an interval - split by '-' - toks = list_item.split("-") + toks: list[str] = list_item.split("-") + if len(toks) == 2: d1 = toks[0].strip() d2 = toks[1].strip() @@ -74,10 +80,16 @@ def text_to_edtf(text: str) -> Optional[str]: # match looks from the beginning of the string, search # looks anywhere. - if re.match(ONE_DIGIT_PARTIAL_FIRST, d2): # 1-digit year partial e.g. 1868-9 - if re.search(PARTIAL_CHECK, d1): # TODO: evaluate it and see if it's a year + if re.match( + ONE_DIGIT_PARTIAL_FIRST, d2 + ): # 1-digit year partial e.g. 1868-9 + if re.search( + PARTIAL_CHECK, d1 + ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(TWO_DIGIT_PARTIAL_FIRST, d2): # 2-digit year partial e.g. 1809-10 + elif re.match( + TWO_DIGIT_PARTIAL_FIRST, d2 + ): # 2-digit year partial e.g. 1809-10 if re.search(PARTIAL_CHECK, d1): d2 = d1[-4:-2] + d2 else: @@ -134,7 +146,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: return None t = text.lower() - result = '' + result: str = "" if re.match(REJECT_RULES, t): return None @@ -143,7 +155,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(MIGHT_BE_DECADE, r'\1', t) + t, is_decade = re.subn(MIGHT_BE_DECADE, r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year @@ -151,7 +163,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: # the word 'circa' anywhere # detect uncertainty signifiers - t, is_uncertain = re.subn(UNCERTAIN_REPL, r'\1', t) + t, is_uncertain = re.subn(UNCERTAIN_REPL, r"\1", t) is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t) # detect century forms @@ -191,7 +203,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_1 + default=DEFAULT_DATE_1, ) dt2 = parse( @@ -199,14 +211,13 @@ def text_to_edtf_date(text: str) -> Optional[str]: dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_2 + default=DEFAULT_DATE_2, ) except ValueError: return None - if dt1.date() == DEFAULT_DATE_1.date() and \ - dt2.date() == DEFAULT_DATE_2.date(): + if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. return None @@ -223,12 +234,12 @@ def text_to_edtf_date(text: str) -> Optional[str]: # approximate/uncertain markers to decide whether we treat it as # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): - result += 'x' + result += "x" elif i == 3 and is_decade: if mentions_year: - result += 'u' # year precision + result += "X" # year precision else: - result += 'x' # decade precision + result += "x" # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default @@ -236,12 +247,12 @@ def text_to_edtf_date(text: str) -> Optional[str]: else: # different values were produced, meaning that it's likely # a default. Use 'unspecified' - result += "u" + result += "X" # strip off unknown chars from end of string - except the first 4 for i in reversed(range(len(result))): - if result[i] not in ('u', 'x', '-'): + if result[i] not in ("X", "-"): smallest_length = 4 if mentions_month: @@ -265,14 +276,16 @@ def text_to_edtf_date(text: str) -> Optional[str]: # end dateutil post-parsing - if is_uncertain: - result += "?" - - if is_approximate: - result += "~" + if is_uncertain and is_approximate: + result += "%" + else: + if is_uncertain: + result += "?" + if is_approximate: + result += "~" # weed out bad parses - if result.startswith("uu-uu"): + if result.startswith("XX-XX"): return None return result From 1aa53cfb2d4e0a2a3c284ec20db60f841b88a7f9 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Aug 2024 15:03:16 +0200 Subject: [PATCH 079/135] Update imports --- edtf/natlang/en.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 191199e..ba192e8 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,12 +1,12 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" import functools +import re from datetime import datetime from typing import Optional from dateutil.parser import parse -import re -from edtf import appsettings +from edtf import appsettings # two dates where every digit of an ISO date representation is different, # and one is in the past and one is in the future. From 8c4f9685bc31224bcd0efcf811485f2e3f34e292 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Aug 2024 16:48:01 +0200 Subject: [PATCH 080/135] Merge fixes --- edtf/natlang/en.py | 18 ++++++++++-------- edtf/parser/parser_classes.py | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index ba192e8..49b04f3 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -4,7 +4,7 @@ from datetime import datetime from typing import Optional -from dateutil.parser import parse +from dateutil.parser import ParserError, parse from edtf import appsettings @@ -126,9 +126,9 @@ def text_to_edtf(text: str) -> Optional[str]: is_after = re.findall(AFTER_CHECK, t) if is_before: - result = f"unknown/{result}" + result = f"/{result}" elif is_after: - result = f"{result}/unknown" + result = f"{result}/" return result @@ -172,7 +172,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dxx" % (int(is_century[0][0]) - 1,) + result = "%02dXX" % (int(is_century[0][0]) - 1,) is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) @@ -214,8 +214,10 @@ def text_to_edtf_date(text: str) -> Optional[str]: default=DEFAULT_DATE_2, ) - except ValueError: - return None + except ParserError: + return + except Exception: + return if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. @@ -234,12 +236,12 @@ def text_to_edtf_date(text: str) -> Optional[str]: # approximate/uncertain markers to decide whether we treat it as # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): - result += "x" + result += "X" elif i == 3 and is_decade: if mentions_year: result += "X" # year precision else: - result += "x" # decade precision + result += "X" # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index d103660..eada1f9 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -4,6 +4,7 @@ from datetime import date, datetime from operator import add, sub from time import struct_time +from typing import Optional from dateutil.relativedelta import relativedelta From 6f08bce95cb583f2825353cbe8ae6a1de1c47df7 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Aug 2024 16:55:59 +0200 Subject: [PATCH 081/135] ruff formatting --- edtf/natlang/en.py | 5 +++-- edtf/parser/parser_classes.py | 9 ++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 49b04f3..97230db 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,4 +1,5 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" + import functools import re from datetime import datetime @@ -52,7 +53,7 @@ REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -@functools.lru_cache() +@functools.lru_cache def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. @@ -133,7 +134,7 @@ def text_to_edtf(text: str) -> Optional[str]: return result -@functools.lru_cache() +@functools.lru_cache def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index eada1f9..ad690fb 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -98,10 +98,6 @@ class EDTFObject: parser = None - def __init__(self, *args, **kwargs): - errmsg: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" - raise NotImplementedError(f"{errmsg} is not implemented.") - @classmethod def set_parser(cls, p): cls.parser = p @@ -288,6 +284,7 @@ def set_year(self, y: int): def get_year(self) -> int: return self._year + year = property(get_year, set_year) def set_month(self, m: Optional[int]): @@ -297,6 +294,7 @@ def set_month(self, m: Optional[int]): def get_month(self) -> Optional[int]: return self._month + month = property(get_month, set_month) def __str__(self): @@ -932,8 +930,9 @@ def __str__(self): return result - def set_year(self, y): # Year can be None. + def set_year(self, y): # Year can be None. self._year = y + year = property(Date.get_year, set_year) def _precise_year(self, lean: str): From 973ccf4cabcd21cc0d7af5e2d1c8bb86992c65e3 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 13 Aug 2024 17:27:15 +0200 Subject: [PATCH 082/135] Remove accidentally committed poetry file --- poetry.lock | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 poetry.lock diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index c4b40b6..0000000 --- a/poetry.lock +++ /dev/null @@ -1,45 +0,0 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. - -[[package]] -name = "pyparsing" -version = "3.1.2" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -optional = false -python-versions = ">=3.6.8" -files = [ - {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, - {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, -] - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[metadata] -lock-version = "2.0" -python-versions = "^3.9" -content-hash = "e6be32f86f1a6af0695f6846b57ed289e015b5634c7f574c45800095a84e2200" From ee450a55a74069daf44da6c476a823dc879f6e78 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Wed, 14 Aug 2024 13:14:07 +0200 Subject: [PATCH 083/135] Fixed: f-string formatting Also added Andrew Hankinson to the authors list in pyproject.toml --- edtf/natlang/en.py | 2 +- pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 97230db..d57bb82 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -104,7 +104,7 @@ def text_to_edtf(text: str) -> Optional[str]: r2 = text_to_edtf_date(d2) if r1 and r2: - result = r1 + "/" + r2 + result = f"{r1}/{r2}" return result # is it an either/or year "1838/1862" - that has a different diff --git a/pyproject.toml b/pyproject.toml index b48c3f7..2d050c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,8 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" }, - { name = "Klaus Rettinghaus" } + { name = "Klaus Rettinghaus" }, + { name = "Andrew Hankinson", email = "andrew.hankinson@rism.digital" }, ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" } From 6774e65c7816dc024ab3d62be4e455e7533805e0 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 15 Aug 2024 07:59:35 +1000 Subject: [PATCH 084/135] Remove extra paragraph in readme --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 2c1f34c..8b2324e 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,6 @@ An implementation of EDTF format in Python, together with utility functions for See for the final draft specification. -This project is based on python-edtf and was developed to include the newest specification - ## To install ```shell From 46bdce6bd97956088e932ba1ca359bac71ca3f06 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 15:40:50 +0200 Subject: [PATCH 085/135] Fixed: return type of statement --- edtf/parser/parser_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index ad690fb..c334ee9 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -28,7 +28,7 @@ PRECISION_DAY = "day" -def days_in_month(year: int, month: int) -> dict: +def days_in_month(year: int, month: int) -> int: """ Return the number of days in the given year and month, where month is 1=January to 12=December, and respecting leap years as identified by From 656f8ad900ddd3d02ead2fce2eb9575c7d049025 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 15:44:38 +0200 Subject: [PATCH 086/135] Updated parser classes I've had a pass at the Parser Classes file, but there are a lot of problems still to be sorted out. I've added return types and argument types whereever it makes sense. The "UncertainOrApproximate" class is a hot mess. There are boolean values with property and method calls associated with them, and I would be surprised if it actually works. However, it doesn't seem to be tested or implemented, so I can't figure out where to go from here. --- edtf/parser/parser_classes.py | 192 +++++++++++++++++----------------- 1 file changed, 94 insertions(+), 98 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index c334ee9..eb9fac5 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -126,7 +126,7 @@ def __init__(self, *args, **kwargs): def __str__(self): raise NotImplementedError - def _strict_date(self, lean: str): + def _strict_date(self, lean: str = EARLIEST): raise NotImplementedError def lower_strict(self): @@ -141,30 +141,31 @@ def _get_fuzzy_padding(self, lean: str): """ return relativedelta(0) - def get_is_approximate(self): + def get_is_approximate(self) -> bool: return getattr(self, "_is_approximate", False) - def set_is_approximate(self, val): + def set_is_approximate(self, val: bool) -> None: self._is_approximate = val - is_approximate = property(get_is_approximate, set_is_approximate) + is_approximate = property(get_is_approximate, set_is_approximate) # noqa - def get_is_uncertain(self): + def get_is_uncertain(self) -> bool: return getattr(self, "_is_uncertain", False) - def set_is_uncertain(self, val): + def set_is_uncertain(self, val: bool) -> None: self._is_uncertain = val - is_uncertain = property(get_is_uncertain, set_is_uncertain) + is_uncertain = property(get_is_uncertain, set_is_uncertain) # noqa - def get_is_uncertain_and_approximate(self): + def get_is_uncertain_and_approximate(self) -> bool: return getattr(self, "_uncertain_and_approximate", False) - def set_is_uncertain_and_approximate(self, val): + def set_is_uncertain_and_approximate(self, val: bool) -> None: self._uncertain_and_approximate = val is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + get_is_uncertain_and_approximate, # noqa + set_is_uncertain_and_approximate, # noqa ) def lower_fuzzy(self): @@ -242,76 +243,71 @@ def __le__(self, other): class Date(EDTFObject): - def set_year(self, y): - if y is None: - raise AttributeError("Year must not be None") - self._year = y - - def get_year(self): - return self._year - - year = property(get_year, set_year) - - def set_month(self, m): - self._month = m - if m is None: - self.day = None - - def get_month(self): - return self._month - - month = property(get_month, set_month) - - def __init__( - self, year=None, month=None, day=None, significant_digits=None, **kwargs + def __init__( # noqa + self, + year: Optional[str] = None, + month: Optional[str] = None, + day: Optional[str] = None, + significant_digits=None, + **kwargs, ): for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day + self._year = year # Year is required, but sometimes passed in as a 'date' dict. + self._month = month + self._day = day self.significant_digits = ( int(significant_digits) if significant_digits else None ) - def set_year(self, y: int): + def set_year(self, y: str): if y is None: raise AttributeError("Year must not be None") self._year = y - def get_year(self) -> int: + def get_year(self) -> str: return self._year - year = property(get_year, set_year) + year = property(get_year, set_year) # noqa - def set_month(self, m: Optional[int]): + def set_month(self, m: Optional[str]): self._month = m if m is None: - self.day = None + self._day = None - def get_month(self) -> Optional[int]: + def get_month(self) -> Optional[str]: return self._month - month = property(get_month, set_month) + month = property(get_month, set_month) # noqa + + def set_day(self, d: Optional[str]): + self._day = d + if d is None: + self._day = None + + def get_day(self) -> Optional[str]: + return self._day + + day = property(get_day, set_day) # noqa def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + r = self._year + if self._month: + r += f"-{self._month}" + if self._day: + r += f"-{self._day}" if self.significant_digits: r += f"S{self.significant_digits}" return r def isoformat(self, default=date.max): return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), + self._year, + int(self._month or default.month), + int(self._day or default.day), ) def lower_fuzzy(self): @@ -320,10 +316,10 @@ def lower_fuzzy(self): sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits lower_year = ( - int(self.year) + int(self._year) // (10**insignificant_digits) * (10**insignificant_digits) ) @@ -335,9 +331,9 @@ def upper_fuzzy(self): add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits - upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + upper_year = (int(self._year) // (10**insignificant_digits) + 1) * ( 10**insignificant_digits ) - 1 return struct_time( @@ -347,23 +343,23 @@ def upper_fuzzy(self): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) + return int(re.sub(r"X", r"0", self._year)) else: - return int(re.sub(r"X", r"9", self.year)) + return int(re.sub(r"X", r"9", self._year)) def _precise_month(self, lean): - if self.month and self.month != "XX": + if self._month and self._month != "XX": try: - return int(self.month) + return int(self._month) except ValueError as err: raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" + f"Couldn't convert {self._month} to int (in {self})" ) from err else: return 1 if lean == EARLIEST else 12 def _precise_day(self, lean): - if not self.day or self.day == "XX": + if not self._day or self._day == "XX": if lean == EARLIEST: return 1 else: @@ -371,9 +367,9 @@ def _precise_day(self, lean): self._precise_year(LATEST), self._precise_month(LATEST) ) else: - return int(self.day) + return int(self._day) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): """ Return a `time.struct_time` representation of the date. """ @@ -389,9 +385,9 @@ def _strict_date(self, lean): @property def precision(self): - if self.day: + if self._day: return PRECISION_DAY - if self.month: + if self._month: return PRECISION_MONTH return PRECISION_YEAR @@ -400,7 +396,7 @@ def estimated(self): class DateAndTime(EDTFObject): - def __init__(self, date, time): + def __init__(self, date, time): # noqa: super raises not implemented self.date = date self.time = time @@ -410,7 +406,7 @@ def __str__(self): def isoformat(self): return self.date.isoformat() + "T" + self.time - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def __eq__(self, other): @@ -429,14 +425,14 @@ def __ne__(self, other): class Interval(EDTFObject): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa: super() raises not implemented self.lower = lower self.upper = upper def __str__(self): return f"{self.lower}/{self.upper}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == EARLIEST: r = self.lower._strict_date(lean) else: @@ -459,7 +455,7 @@ def parse_action(cls, toks): args = toks.asList() return cls(*args) - def __init__(self, *args): + def __init__(self, *args): # noqa: super() raises not implemented if len(args) != 1: raise AssertionError("UA must have exactly one argument") ua = args[0] @@ -488,7 +484,7 @@ def _get_multiplier(self): class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): + def __init__(self, date, ua): # noqa: super() raises not implemented self.date = date self.ua = ua self.is_uncertain = ua.is_uncertain if ua else False @@ -503,7 +499,7 @@ def __str__(self): else: return str(self.date) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -532,7 +528,7 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): + def __init__(self, sectionOpen=False, other_section_element=None): # noqa: super() raises not implemented if sectionOpen: self.is_open = True self.is_unknown = False @@ -547,14 +543,17 @@ def __str__(self): else: return ".." - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): + if lean not in (EARLIEST, LATEST): + raise ValueError("lean must be one of EARLIEST or LATEST") + if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: return -math.inf - else: + elif lean == LATEST: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) @@ -717,7 +716,7 @@ def precision(self): class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): + def __init__(self, lower: Optional[dict] = None, upper: Optional[dict] = None): # noqa if lower: if lower["date"] == "..": self.lower = UnspecifiedIntervalSection( @@ -740,8 +739,10 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) - self.is_approximate = self.lower.is_approximate or self.upper.is_approximate - self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_approximate: bool = ( + self.lower.is_approximate or self.upper.is_approximate + ) + self.is_uncertain: bool = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( self.lower.is_uncertain_and_approximate or self.upper.is_uncertain_and_approximate @@ -755,7 +756,7 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): - def __init__(self, year, significant_digits=None): + def __init__(self, year: str, significant_digits: Optional[str] = None): # noqa self.year = year self.significant_digits = ( int(significant_digits) if significant_digits else None @@ -770,7 +771,7 @@ def __str__(self): def _precise_year(self): return int(self.year) - def _strict_date(self, lean: str): + def _strict_date(self, lean: str = EARLIEST): py = self._precise_year() if lean == EARLIEST: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @@ -818,7 +819,7 @@ def upper_fuzzy(self): class Season(Date): - def __init__(self, year, season, **kwargs): + def __init__(self, year, season, **kwargs): # noqa self.year = year self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited @@ -840,12 +841,7 @@ def _precise_month(self, lean): class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( + def __init__( # noqa self, year=None, month=None, @@ -933,7 +929,7 @@ def __str__(self): def set_year(self, y): # Year can be None. self._year = y - year = property(Date.get_year, set_year) + year = property(Date.get_year, set_year) # noqa def _precise_year(self, lean: str): if self.season: @@ -1018,7 +1014,7 @@ class PartialUnspecified(Unspecified): class Consecutives(Interval): # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): + def __init__(self, lower=None, upper=None): # noqa if lower and not isinstance(lower, EDTFObject): self.lower = Date.parse(lower) else: @@ -1044,7 +1040,7 @@ def __str__(self): class OneOfASet(EDTFObject): - def __init__(self, *args): + def __init__(self, *args): # noqa self.objects = args @classmethod @@ -1053,9 +1049,9 @@ def parse_action(cls, toks): return cls(*args) def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) + return f"[{", ".join([str(o) for o in self.objects])}]" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: @@ -1077,7 +1073,7 @@ def _strict_date(self, lean): class MultipleDates(EDTFObject): - def __init__(self, *args): + def __init__(self, *args): # noqa self.objects = args @classmethod @@ -1086,16 +1082,16 @@ def parse_action(cls, toks): return cls(*args) def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) + return f"{{{", ".join([str(o) for o in self.objects])}}}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) return min([x._strict_date(lean) for x in self.objects]) class Level2Interval(Level1Interval): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa # Check whether incoming lower/upper values are single-item lists, and # if so take just the first item. This works around what I *think* is a # bug in the grammar that provides us with single-item lists of @@ -1122,7 +1118,7 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, significant_digits=None): + def __init__(self, base, exponent, significant_digits=None): # noqa self.base = base self.exponent = exponent self.significant_digits = ( @@ -1132,13 +1128,13 @@ def __init__(self, base, exponent, significant_digits=None): def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) - def get_year(self): + def get_year(self) -> str: if self.significant_digits: return f"{self.base}E{self.exponent}S{self.significant_digits}" else: return f"{self.base}E{self.exponent}" - year = property(get_year) + year = property(get_year) # noqa def estimated(self): return self._precise_year() From add79bd311c2af7698043deff7f992535cb22aed Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 15:45:39 +0200 Subject: [PATCH 087/135] Fixed: Remove SHORT_YEAR_RE This wasn't actually used anywhere! Also removed a redundant regex group --- edtf/natlang/en.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index d57bb82..9cee578 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -15,7 +15,6 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = re.compile(r"(-?)([\du])([\dxu])([\dxu])([\dxu])") LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)") CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?") CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]") @@ -27,7 +26,7 @@ BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") APPROX_CHECK = re.compile( - r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)" + r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|^~" ) UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") UNCERTAIN_REPL = re.compile(r"(\d{4})\?") From fee0b648e2344169aeee2b35068c670afc7325a7 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 15:50:51 +0200 Subject: [PATCH 088/135] Problem with f-string --- edtf/parser/parser_classes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index eb9fac5..0334738 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1049,7 +1049,8 @@ def parse_action(cls, toks): return cls(*args) def __str__(self): - return f"[{", ".join([str(o) for o in self.objects])}]" + repr: str = ", ".join([str(o) for o in self.objects]) + return f"[{repr}]" def _strict_date(self, lean: str = EARLIEST): strict_dates = [x._strict_date(lean) for x in self.objects] From 89f36924adf59d271aadc3df6ac3ea1454ccb093 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 15:59:23 +0200 Subject: [PATCH 089/135] Another f-string fix --- edtf/parser/parser_classes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0334738..14728f0 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1083,7 +1083,8 @@ def parse_action(cls, toks): return cls(*args) def __str__(self): - return f"{{{", ".join([str(o) for o in self.objects])}}}" + repr: str = ", ".join([str(o) for o in self.objects]) + return f"{{{repr}}}" def _strict_date(self, lean: str = EARLIEST): if lean == LATEST: From 9da1d94436e124a337fd81133cee5ac48b85cea5 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 12 Sep 2024 16:29:46 +0200 Subject: [PATCH 090/135] Fixed: pyproject errors --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2d050c2..8826b99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [project] name = "edtf" version = "5.0.0" +license = { file = "LICENSE" } +keywords = ['edtf'] dependencies = [ "python-dateutil", "pyparsing", From fe28d3410340fcd5f4fb6c264abe0721c04ec4d8 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Fri, 11 Oct 2024 10:56:33 +0200 Subject: [PATCH 091/135] add Python 3.13 to ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4645d13..1b6cd83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . From 61179bb516522f53e85ba91523192ee10dbc9018 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Fri, 11 Oct 2024 11:36:48 +0200 Subject: [PATCH 092/135] add support for Python 3.13 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b48c3f7..50434a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] [project.optional-dependencies] From 95b83aab6218d5f4b1ef445f7c10970789b683c8 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 16 Jan 2025 17:17:39 +0100 Subject: [PATCH 093/135] Testing without lru_cache --- edtf/natlang/en.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 9cee578..6e77e5b 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,6 +1,5 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" -import functools import re from datetime import datetime from typing import Optional @@ -52,7 +51,7 @@ REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -@functools.lru_cache +# @functools.lru_cache def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. @@ -133,7 +132,7 @@ def text_to_edtf(text: str) -> Optional[str]: return result -@functools.lru_cache +# @functools.lru_cache def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. From 6262a38aab5ef18f9f109455c2534087fef943b6 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 16 Jan 2025 17:28:17 +0100 Subject: [PATCH 094/135] Fixed: New ruff rules --- edtf/natlang/en.py | 4 ++-- edtf/parser/grammar.py | 18 +++++++++--------- edtf/parser/parser_classes.py | 6 +----- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 6e77e5b..62ccca5 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -171,7 +171,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dXX" % (int(is_century[0][0]) - 1,) + result = f"{int(is_century[0][0]) - 1:02d}XX" is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) @@ -182,7 +182,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: pass elif is_ce: - result = "%04d" % (int(is_ce[0][0])) + result = f"{int(is_ce[0][0]):04d}" is_approximate = is_approximate or re.findall(APPROX_CE_RE, t) is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index beabf52..b11a3c8 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -49,15 +49,15 @@ Unspecified, ) -oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) -oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) -oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) -zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) -oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) -oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) -oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) -oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) -zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) +oneThru12 = oneOf([f"{i:.2d}" for i in range(1, 13)]) +oneThru13 = oneOf([f"{i:.2d}" for i in range(1, 14)]) +oneThru23 = oneOf([f"{i:.2d}" for i in range(1, 24)]) +zeroThru23 = oneOf([f"{i:.2d}" for i in range(0, 24)]) +oneThru29 = oneOf([f"{i:.2d}" for i in range(1, 30)]) +oneThru30 = oneOf([f"{i:.2d}" for i in range(1, 31)]) +oneThru31 = oneOf([f"{i:.2d}" for i in range(1, 32)]) +oneThru59 = oneOf([f"{i:.2d}" for i in range(1, 60)]) +zeroThru59 = oneOf([f"{i:.2d}" for i in range(0, 60)]) digit = Word(nums, exact=1) positiveDigit = Word(nums, exact=1, excludeChars="0") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 14728f0..9439a80 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -304,11 +304,7 @@ def __str__(self): return r def isoformat(self, default=date.max): - return "%s-%02d-%02d" % ( - self._year, - int(self._month or default.month), - int(self._day or default.day), - ) + return f"{self._year}-{int(self._month or default.month):02d}-{int(self._day or default.day):02d}" def lower_fuzzy(self): if not hasattr(self, "significant_digits") or not self.significant_digits: From 8fbce49ce8105cab5ae52f8e0d8fa4f94b042a49 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 16 Jan 2025 17:34:42 +0100 Subject: [PATCH 095/135] Fixed formatting --- edtf/natlang/tests.py | 6 ++-- edtf/parser/tests.py | 66 +++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index d2c43a5..e0acaad 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -182,9 +182,9 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert ( - result == expected_output - ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + assert result == expected_output, ( + f"Failed for input: {input_text} - expected {expected_output}, got {result}" + ) @pytest.mark.benchmark diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c2dd711..f37c806 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -312,51 +312,51 @@ def test_edtf_examples(test_input, expected_tuple): # Unpack expected results based on their count if len(expected_tuple) == 1: - assert ( - result_date == expected_tuple[0] - ), f"Expected {expected_tuple[0]}, got {result_date}" + assert result_date == expected_tuple[0], ( + f"Expected {expected_tuple[0]}, got {result_date}" + ) elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert ( - result.lower_strict() == strict_date - ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" - assert ( - result.upper_strict() == strict_date - ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == strict_date, ( + f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + ) + assert result.upper_strict() == strict_date, ( + f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From adc1805886df4ad57f4ef0b7c1a441948c889c08 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Thu, 16 Jan 2025 17:53:27 +0100 Subject: [PATCH 096/135] Bad formatting conversion --- edtf/parser/grammar.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index b11a3c8..2fdb4bf 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -49,15 +49,15 @@ Unspecified, ) -oneThru12 = oneOf([f"{i:.2d}" for i in range(1, 13)]) -oneThru13 = oneOf([f"{i:.2d}" for i in range(1, 14)]) -oneThru23 = oneOf([f"{i:.2d}" for i in range(1, 24)]) -zeroThru23 = oneOf([f"{i:.2d}" for i in range(0, 24)]) -oneThru29 = oneOf([f"{i:.2d}" for i in range(1, 30)]) -oneThru30 = oneOf([f"{i:.2d}" for i in range(1, 31)]) -oneThru31 = oneOf([f"{i:.2d}" for i in range(1, 32)]) -oneThru59 = oneOf([f"{i:.2d}" for i in range(1, 60)]) -zeroThru59 = oneOf([f"{i:.2d}" for i in range(0, 60)]) +oneThru12 = oneOf([f"{i:02}" for i in range(1, 13)]) +oneThru13 = oneOf([f"{i:02}" for i in range(1, 14)]) +oneThru23 = oneOf([f"{i:02}" for i in range(1, 24)]) +zeroThru23 = oneOf([f"{i:02}" for i in range(0, 24)]) +oneThru29 = oneOf([f"{i:02}" for i in range(1, 30)]) +oneThru30 = oneOf([f"{i:02}" for i in range(1, 31)]) +oneThru31 = oneOf([f"{i:02}" for i in range(1, 32)]) +oneThru59 = oneOf([f"{i:02}" for i in range(1, 60)]) +zeroThru59 = oneOf([f"{i:02}" for i in range(0, 60)]) digit = Word(nums, exact=1) positiveDigit = Word(nums, exact=1, excludeChars="0") From ffbe2d4e31635565155443b4f902f70eeb153dda Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Fri, 17 Jan 2025 11:06:43 +0100 Subject: [PATCH 097/135] Replace range len with enumerate --- edtf/natlang/en.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 62ccca5..82fefc8 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -230,7 +230,7 @@ def text_to_edtf_date(text: str) -> Optional[str]: mentions_month = re.findall(MENTIONS_MONTH, t) mentions_day = re.findall(MENTIONS_DAY, t) - for i in range(len(date1)): + for i, char in enumerate(date1): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. @@ -241,10 +241,10 @@ def text_to_edtf_date(text: str) -> Optional[str]: result += "X" # year precision else: result += "X" # decade precision - elif date1[i] == date2[i]: + elif char == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default - result += date1[i] + result += char else: # different values were produced, meaning that it's likely # a default. Use 'unspecified' From f7aeddb59f38d4169c8e31a7432baccb03fcaad4 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 21 Jan 2025 10:02:55 +0100 Subject: [PATCH 098/135] reinstate lru cache --- edtf/natlang/en.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 82fefc8..f287f42 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,5 +1,6 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" +import functools import re from datetime import datetime from typing import Optional @@ -51,7 +52,7 @@ REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -# @functools.lru_cache +@functools.lru_cache def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. @@ -132,7 +133,7 @@ def text_to_edtf(text: str) -> Optional[str]: return result -# @functools.lru_cache +@functools.lru_cache def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. From f00f7a9121bb05b5167a2171c90af100a265d7fa Mon Sep 17 00:00:00 2001 From: Peter Eichman Date: Sun, 4 May 2025 18:48:22 -0400 Subject: [PATCH 099/135] Added to the approximate/uncertain examples Checking whether the correct flags are set when the date has a UA marker to the left of the year. --- edtf/parser/tests.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c2dd711..85fae22 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -257,6 +257,18 @@ ("2011-~06-~04", (False, True, False)), ("2004-06-~01/2004-06-~20", (False, True, False)), ("156X~", (False, True, False)), + ("?1945/1959", (True, False, False)), + ("?1945", (True, False, False)), + ("?1945-01", (True, False, False)), + ("?1945-01-01", (True, False, False)), + ("~1945/1959", (False, True, False)), + ("~1945", (False, True, False)), + ("~1945-01", (False, True, False)), + ("~1945-01-01", (False, True, False)), + ("%1945/1959", (False, False, True)), + ("%1945", (False, False, True)), + ("%1945-01", (False, False, True)), + ("%1945-01-01", (False, False, True)), ) BAD_EXAMPLES = ( From 68f0b36deee03a355e6bec9f255d718f0d9f032b Mon Sep 17 00:00:00 2001 From: Peter Eichman Date: Sun, 4 May 2025 19:01:32 -0400 Subject: [PATCH 100/135] Handle UA markers to the left of the year segment - include `year_ua_b` in the set of attributes to check when determining whether a date is uncertain, approximate, or both - if there is no month, omit it from the stringification; this corrects a bug where `~1945` would turn into `~1945-None` after parsing and stringification --- edtf/parser/parser_classes.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index ed03355..1326430 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -858,6 +858,7 @@ def __init__( uas = [ year_ua, + year_ua_b, month_ua, day_ua, year_month_ua, @@ -886,7 +887,10 @@ def __str__(self): else: y = f"{self.year_ua_b}{self.year}" if self.year_ua_b else str(self.year) - m = f"{self.month_ua}{self.month}" if self.month_ua else str(self.month) + if self.month: + m = f"{self.month_ua}{self.month}" if self.month_ua else str(self.month) + else: + m = None if self.day: d = f"{self.day_ua}{self.day}" if self.day_ua else str(self.day) @@ -902,7 +906,12 @@ def __str__(self): else: result = f"{y}-({m}-{d}){self.month_day_ua}" else: - result = f"{y}-{m}-{d}" if d else f"{y}-{m}" + if d: + result = f"{y}-{m}-{d}" + elif m: + result = f"{y}-{m}" + else: + result = y if self.all_ua: result = f"({result}){self.all_ua}" From 4885de55fc157e3a9118900e74b5733fef36e185 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 26 May 2025 13:41:33 +0200 Subject: [PATCH 101/135] Updates to typing etc. --- README.md | 2 +- edtf/convert.py | 2 +- edtf/jdutil.py | 4 +- edtf/natlang/en.py | 11 +- edtf/parser/grammar.py | 5 +- edtf/parser/parser_classes.py | 276 +++++++++++++++++----------------- edtf/py.typed | 0 pyproject.toml | 4 +- 8 files changed, 150 insertions(+), 154 deletions(-) create mode 100644 edtf/py.typed diff --git a/README.md b/README.md index 2c1f34c..c28d450 100644 --- a/README.md +++ b/README.md @@ -516,6 +516,6 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut * Fix formatting: `ruff format --config pyproject.toml` * Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. -### Coverage and benchmraks +### Coverage and benchmarks Coverage reports are generated and added as comments to commits, and also visible in the actions log. Benchmarks are run on pull requests and are published [here]( https://ixc.github.io/python-edtf/dev/bench/) and also visible in the actions log. diff --git a/edtf/convert.py b/edtf/convert.py index db86155..c03e2ea 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -21,7 +21,7 @@ def old_specs_to_new_specs_expression(expression): return expression -def dt_to_struct_time(dt): +def dt_to_struct_time(dt) -> struct_time: """ Convert a `datetime.date` or `datetime.datetime` to a `struct_time` representation *with zero values* for data fields that we cannot always diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 7c0a3bd..b7a2cbb 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -396,7 +396,7 @@ def __sub__(self, other): return jd_to_datetime(combined) - elif isinstance(other, (datetime, dt.datetime)): + elif isinstance(other, datetime | dt.datetime): diff = datetime_to_jd(self) - datetime_to_jd(other) return dt.timedelta(diff) @@ -407,7 +407,7 @@ def __sub__(self, other): raise TypeError(s) def __rsub__(self, other): - if not isinstance(other, (datetime, dt.datetime)): + if not isinstance(other, datetime | dt.datetime): s = "jdutil.datetime supports '-' with: " s += "jdutil.datetime and datetime.datetime" raise TypeError(s) diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f287f42..077ae19 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -3,7 +3,6 @@ import functools import re from datetime import datetime -from typing import Optional from dateutil.parser import ParserError, parse @@ -53,7 +52,7 @@ @functools.lru_cache -def text_to_edtf(text: str) -> Optional[str]: +def text_to_edtf(text: str) -> str | None: """ Generate EDTF string equivalent of a given natural language date string. """ @@ -63,7 +62,7 @@ def text_to_edtf(text: str) -> Optional[str]: t = text.lower() # try parsing the whole thing - result: Optional[str] = text_to_edtf_date(t) + result: str | None = text_to_edtf_date(t) if not result: # split by list delims and move fwd with the first thing that returns a non-empty string. @@ -134,7 +133,7 @@ def text_to_edtf(text: str) -> Optional[str]: @functools.lru_cache -def text_to_edtf_date(text: str) -> Optional[str]: +def text_to_edtf_date(text: str) -> str | None: """ Return EDTF string equivalent of a given natural language date string. @@ -215,9 +214,9 @@ def text_to_edtf_date(text: str) -> Optional[str]: ) except ParserError: - return + return None except Exception: - return + return None if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 2fdb4bf..db6e93e 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -343,15 +343,16 @@ def f(toks): ) -def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): +def parse_edtf(input_string, parse_all=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING if not input_string: raise EDTFParseException(input_string) try: - p = edtfParser.parseString(input_string.strip(), parseAll) + p = edtfParser.parseString(input_string.strip(), parse_all) if p: return p[0] + return None except ParseException as err: if fail_silently: return None diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 9439a80..f4168b9 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,6 +1,7 @@ import calendar import math import re +from collections.abc import Callable from datetime import date, datetime from operator import add, sub from time import struct_time @@ -50,7 +51,7 @@ def days_in_month(year: int, month: int) -> int: }[month] -def apply_delta(op, time_struct, delta): +def apply_delta(op: Callable, time_struct: struct_time, delta) -> struct_time: """ Apply a `relativedelta` to a `struct_time` data structure. @@ -76,9 +77,9 @@ def apply_delta(op, time_struct, delta): # Adjust the year to be close to the 2000 millenium in 1,000 year # increments to try and retain accurate relative leap years - actual_year = time_struct.tm_year - millenium = int(float(actual_year) / 1000) - millenium_diff = (2 - millenium) * 1000 + actual_year: int = time_struct.tm_year + millenium: int = int(float(actual_year) / 1000) + millenium_diff: int = (2 - millenium) * 1000 adjusted_year = actual_year + millenium_diff # Apply delta to the date/time with adjusted year dt = datetime(*(adjusted_year,) + time_struct[1:6]) @@ -97,6 +98,9 @@ class EDTFObject: """ parser = None + _is_approximate: bool + _is_uncertain: bool + _uncertain_and_approximate: bool @classmethod def set_parser(cls, p): @@ -116,26 +120,26 @@ def parse_action(cls, toks): def parse(cls, s): return cls.parser.parseString(s)[0] - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}: '{str(self)}'" def __init__(self, *args, **kwargs): - str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" - raise NotImplementedError(f"{str} is not implemented.") + message: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{message} is not implemented.") - def __str__(self): + def __str__(self) -> str: raise NotImplementedError def _strict_date(self, lean: str = EARLIEST): raise NotImplementedError - def lower_strict(self): + def lower_strict(self) -> struct_time: return self._strict_date(lean=EARLIEST) - def upper_strict(self): + def upper_strict(self) -> struct_time: return self._strict_date(lean=LATEST) - def _get_fuzzy_padding(self, lean: str): + def _get_fuzzy_padding(self, lean: str) -> relativedelta: """ Subclasses should override this to pad based on how precise they are. """ @@ -168,15 +172,15 @@ def set_is_uncertain_and_approximate(self, val: bool) -> None: set_is_uncertain_and_approximate, # noqa ) - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: strict_val = self.lower_strict() return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - def upper_fuzzy(self): + def upper_fuzzy(self) -> struct_time: strict_val = self.upper_strict() return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, EDTFObject): return str(self) == str(other) elif isinstance(other, date): @@ -185,7 +189,7 @@ def __eq__(self, other): return self._strict_date() == trim_struct_time(other) return False - def __ne__(self, other): + def __ne__(self, other) -> bool: if isinstance(other, EDTFObject): return str(self) != str(other) elif isinstance(other, date): @@ -194,7 +198,7 @@ def __ne__(self, other): return self._strict_date() != trim_struct_time(other) return True - def __gt__(self, other): + def __gt__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() > other.lower_strict() elif isinstance(other, date): @@ -205,7 +209,7 @@ def __gt__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __ge__(self, other): + def __ge__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() >= other.lower_strict() elif isinstance(other, date): @@ -216,7 +220,7 @@ def __ge__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __lt__(self, other): + def __lt__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() < other.lower_strict() elif isinstance(other, date): @@ -227,7 +231,7 @@ def __lt__(self, other): f"can't compare {type(self).__name__} with {type(other).__name__}" ) - def __le__(self, other): + def __le__(self, other) -> bool: if isinstance(other, EDTFObject): return self.lower_strict() <= other.lower_strict() elif isinstance(other, date): @@ -245,9 +249,9 @@ def __le__(self, other): class Date(EDTFObject): def __init__( # noqa self, - year: Optional[str] = None, - month: Optional[str] = None, - day: Optional[str] = None, + year: str | None = None, + month: str | None = None, + day: str | None = None, significant_digits=None, **kwargs, ): @@ -256,10 +260,12 @@ def __init__( # noqa self.__init__(**kwargs[param]) return - self._year = year # Year is required, but sometimes passed in as a 'date' dict. - self._month = month - self._day = day - self.significant_digits = ( + self._year: str | None = ( + year # Year is required, but sometimes passed in as a 'date' dict. + ) + self._month: str | None = month + self._day: str | None = day + self.significant_digits: int | None = ( int(significant_digits) if significant_digits else None ) @@ -268,32 +274,32 @@ def set_year(self, y: str): raise AttributeError("Year must not be None") self._year = y - def get_year(self) -> str: + def get_year(self) -> str | None: return self._year year = property(get_year, set_year) # noqa - def set_month(self, m: Optional[str]): + def set_month(self, m: str | None): self._month = m if m is None: self._day = None - def get_month(self) -> Optional[str]: + def get_month(self) -> str | None: return self._month month = property(get_month, set_month) # noqa - def set_day(self, d: Optional[str]): + def set_day(self, d: str | None): self._day = d if d is None: self._day = None - def get_day(self) -> Optional[str]: + def get_day(self) -> str | None: return self._day day = property(get_day, set_day) # noqa - def __str__(self): + def __str__(self) -> str: r = self._year if self._month: r += f"-{self._month}" @@ -303,47 +309,42 @@ def __str__(self): r += f"S{self.significant_digits}" return r - def isoformat(self, default=date.max): + def isoformat(self, default=date.max) -> str: return f"{self._year}-{int(self._month or default.month):02d}-{int(self._day or default.day):02d}" - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: if not hasattr(self, "significant_digits") or not self.significant_digits: return apply_delta( sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) - else: - total_digits = len(self._year) - insignificant_digits = total_digits - self.significant_digits - lower_year = ( - int(self._year) - // (10**insignificant_digits) - * (10**insignificant_digits) - ) - return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - def upper_fuzzy(self): + total_digits = len(self._year) + insignificant_digits = total_digits - self.significant_digits + lower_year = ( + int(self._year) // (10**insignificant_digits) * (10**insignificant_digits) + ) + return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def upper_fuzzy(self) -> struct_time: if not hasattr(self, "significant_digits") or not self.significant_digits: return apply_delta( add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) - else: - total_digits = len(self._year) - insignificant_digits = total_digits - self.significant_digits - upper_year = (int(self._year) // (10**insignificant_digits) + 1) * ( - 10**insignificant_digits - ) - 1 - return struct_time( - [upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS - ) - def _precise_year(self, lean): + total_digits: int = len(self._year) + insignificant_digits: int = total_digits - self.significant_digits + upper_year: int = (int(self._year) // (10**insignificant_digits) + 1) * ( + 10**insignificant_digits + ) - 1 + return struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def _precise_year(self, lean: str) -> int: # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: return int(re.sub(r"X", r"0", self._year)) - else: - return int(re.sub(r"X", r"9", self._year)) + return int(re.sub(r"X", r"9", self._year)) - def _precise_month(self, lean): + def _precise_month(self, lean: str) -> int: if self._month and self._month != "XX": try: return int(self._month) @@ -351,10 +352,9 @@ def _precise_month(self, lean): raise ValueError( f"Couldn't convert {self._month} to int (in {self})" ) from err - else: - return 1 if lean == EARLIEST else 12 + return 1 if lean == EARLIEST else 12 - def _precise_day(self, lean): + def _precise_day(self, lean: str) -> int: if not self._day or self._day == "XX": if lean == EARLIEST: return 1 @@ -362,10 +362,9 @@ def _precise_day(self, lean): return days_in_month( self._precise_year(LATEST), self._precise_month(LATEST) ) - else: - return int(self._day) + return int(self._day) - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: """ Return a `time.struct_time` representation of the date. """ @@ -380,39 +379,39 @@ def _strict_date(self, lean: str = EARLIEST): ) @property - def precision(self): + def precision(self) -> str: if self._day: return PRECISION_DAY if self._month: return PRECISION_MONTH return PRECISION_YEAR - def estimated(self): + def estimated(self) -> int: return self._precise_year(EARLIEST) class DateAndTime(EDTFObject): - def __init__(self, date, time): # noqa: super raises not implemented - self.date = date + def __init__(self, date: Date, time): # noqa: super raises not implemented + self.date: Date = date self.time = time - def __str__(self): + def __str__(self) -> str: return self.isoformat() - def isoformat(self): + def isoformat(self) -> str: return self.date.isoformat() + "T" + self.time - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: return self.date._strict_date(lean) - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, datetime): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) return super().__eq__(other) - def __ne__(self, other): + def __ne__(self, other) -> bool: if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): @@ -428,15 +427,13 @@ def __init__(self, lower, upper): # noqa: super() raises not implemented def __str__(self): return f"{self.lower}/{self.upper}" - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: if lean == EARLIEST: - r = self.lower._strict_date(lean) - else: - r = self.upper._strict_date(lean) - return r + return self.lower._strict_date(lean) + return self.upper._strict_date(lean) @property - def precision(self): + def precision(self) -> int | None: if self.lower.precision == self.upper.precision: return self.lower.precision return None @@ -456,27 +453,28 @@ def __init__(self, *args): # noqa: super() raises not implemented raise AssertionError("UA must have exactly one argument") ua = args[0] - self.is_uncertain = "?" in ua - self.is_approximate = "~" in ua - self.is_uncertain_and_approximate = "%" in ua + self.is_uncertain: bool = "?" in ua + self.is_approximate: bool = "~" in ua + self.is_uncertain_and_approximate: bool = "%" in ua - def __str__(self): - d = "" + def __str__(self) -> str: + d: list = [] if self.is_uncertain: - d += "?" + d.append("?") if self.is_approximate: - d += "~" + d.append("~") if self.is_uncertain_and_approximate: - d += "%" - return d + d.append("%") + return "".join(d) - def _get_multiplier(self): + def _get_multiplier(self) -> float | None: if self.is_uncertain_and_approximate: return appsettings.MULTIPLIER_IF_BOTH elif self.is_uncertain: return appsettings.MULTIPLIER_IF_UNCERTAIN elif self.is_approximate: return appsettings.MULTIPLIER_IF_APPROXIMATE + return None class UncertainOrApproximate(EDTFObject): @@ -489,13 +487,12 @@ def __init__(self, date, ua): # noqa: super() raises not implemented ua.is_uncertain_and_approximate if ua else False ) - def __str__(self): + def __str__(self) -> str: if self.ua: return f"{self.date}{self.ua}" - else: - return str(self.date) + return str(self.date) - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> tuple: return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -536,10 +533,9 @@ def __init__(self, sectionOpen=False, other_section_element=None): # noqa: supe def __str__(self): if self.is_unknown: return "" - else: - return ".." + return ".." - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> float | None: if lean not in (EARLIEST, LATEST): raise ValueError("lean must be one of EARLIEST or LATEST") @@ -555,6 +551,7 @@ def _strict_date(self, lean: str = EARLIEST): return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) else: return math.inf + return None @property def precision(self): @@ -661,10 +658,10 @@ def lower_strict(self): ) else: return strict_val - else: - return self._strict_date(lean=EARLIEST) - def upper_strict(self): + return self._strict_date(lean=EARLIEST) + + def upper_strict(self) -> struct_time: if self.negative: strict_val = self._strict_date(lean=EARLIEST) if self.precision in ( @@ -689,8 +686,7 @@ def upper_strict(self): ) else: return strict_val - else: - return self._strict_date(lean=LATEST) + return self._strict_date(lean=LATEST) @property def precision(self): @@ -744,15 +740,16 @@ def __init__(self, lower: Optional[dict] = None, upper: Optional[dict] = None): or self.upper.is_uncertain_and_approximate ) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean) -> relativedelta | None: if lean == EARLIEST: return self.lower._get_fuzzy_padding(lean) elif lean == LATEST: return self.upper._get_fuzzy_padding(lean) + return None class LongYear(EDTFObject): - def __init__(self, year: str, significant_digits: Optional[str] = None): # noqa + def __init__(self, year: str, significant_digits: str | None = None): # noqa self.year = year self.significant_digits = ( int(significant_digits) if significant_digits else None @@ -761,18 +758,16 @@ def __init__(self, year: str, significant_digits: Optional[str] = None): # noqa def __str__(self): if self.significant_digits: return f"Y{self.year}S{self.significant_digits}" - else: - return f"Y{self.year}" + return f"Y{self.year}" def _precise_year(self): return int(self.year) - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> struct_time: py = self._precise_year() if lean == EARLIEST: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) def estimated(self): return self._precise_year() @@ -782,18 +777,19 @@ def lower_fuzzy(self): strict_val = self.lower_strict() if not self.significant_digits: return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - else: - insignificant_digits = len(str(full_year)) - int(self.significant_digits) - if insignificant_digits <= 0: - return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - padding_value = 10**insignificant_digits - sig_digits = full_year // padding_value - lower_year = sig_digits * padding_value - return apply_delta( - sub, - struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), - self._get_fuzzy_padding(EARLIEST), - ) + + insignificant_digits = len(str(full_year)) - int(self.significant_digits) + if insignificant_digits <= 0: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + lower_year = sig_digits * padding_value + return apply_delta( + sub, + struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(EARLIEST), + ) def upper_fuzzy(self): full_year = self._precise_year() @@ -822,7 +818,7 @@ def __init__(self, year, season, **kwargs): # noqa # `Date` methods do their thing. self.day = None - def __str__(self): + def __str__(self) -> str: return f"{self.year}-{self.season}" def _precise_month(self, lean): @@ -842,15 +838,15 @@ def __init__( # noqa year=None, month=None, day=None, - year_ua=False, - month_ua=False, - day_ua=False, - year_month_ua=False, - month_day_ua=False, + year_ua: UA | None = None, + month_ua: UA | None = None, + day_ua: UA | None = None, + year_month_ua: UA | None = None, + month_day_ua: UA | None = None, ssn=None, - season_ua=False, - all_ua=False, - year_ua_b=False, + season_ua: UA | None = None, + all_ua: UA | None = None, + year_ua_b: UA | None = None, ): self.year = year self.month = month @@ -890,7 +886,7 @@ def __init__( # noqa if hasattr(item, "is_uncertain_and_approximate") ) - def __str__(self): + def __str__(self) -> str: if self.season_ua: return f"{self.season}{self.season_ua}" @@ -937,12 +933,12 @@ def _precise_month(self, lean: str): return self.season._precise_month(lean) return super()._precise_month(lean) - def _precise_day(self, lean): + def _precise_day(self, lean: str): if self.season: return self.season._precise_day(lean) return super()._precise_day(lean) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str): """ This is not a perfect interpretation as fuzziness is introduced for redundant uncertainly modifiers e.g. (2006~)~ will get two sets of @@ -1022,7 +1018,7 @@ def __init__(self, lower=None, upper=None): # noqa self.upper = upper def __str__(self): - return "{}..{}".format(self.lower or "", self.upper or "") + return f"{self.lower or ''}..{self.upper or ''}" class EarlierConsecutives(Level1Interval): @@ -1048,7 +1044,7 @@ def __str__(self): repr: str = ", ".join([str(o) for o in self.objects]) return f"[{repr}]" - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> float: strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: @@ -1082,7 +1078,7 @@ def __str__(self): repr: str = ", ".join([str(o) for o in self.objects]) return f"{{{repr}}}" - def _strict_date(self, lean: str = EARLIEST): + def _strict_date(self, lean: str = EARLIEST) -> float: if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) return min([x._strict_date(lean) for x in self.objects]) @@ -1094,15 +1090,16 @@ def __init__(self, lower, upper): # noqa # if so take just the first item. This works around what I *think* is a # bug in the grammar that provides us with single-item lists of # `PartialUncertainOrApproximate` items for lower/upper values. - if isinstance(lower, (tuple, list)) and len(lower) == 1: + if isinstance(lower, tuple | list) and len(lower) == 1: self.lower = lower[0] else: self.lower = lower - if isinstance(lower, (tuple, list)) and len(upper) == 1: + if isinstance(lower, tuple | list) and len(upper) == 1: self.upper = upper[0] else: self.upper = upper + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( @@ -1123,16 +1120,15 @@ def __init__(self, base, exponent, significant_digits=None): # noqa int(significant_digits) if significant_digits else None ) - def _precise_year(self): + def _precise_year(self) -> int: return int(self.base) * 10 ** int(self.exponent) def get_year(self) -> str: if self.significant_digits: return f"{self.base}E{self.exponent}S{self.significant_digits}" - else: - return f"{self.base}E{self.exponent}" + return f"{self.base}E{self.exponent}" year = property(get_year) # noqa - def estimated(self): + def estimated(self) -> int: return self._precise_year() diff --git a/edtf/py.typed b/edtf/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 8826b99..2f098bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pyparsing", ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" -requires-python = ">=3.8" +requires-python = ">=3.11" readme = {file = "README.md", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, @@ -111,7 +111,7 @@ exclude_lines = [ [tool.ruff] # Python 3.8 -target-version = "py38" +target-version = "py311" extend-exclude = [ '**/migrations/*', From 98bfe3651f61b572355064b2b9af6a25140bc6c7 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 26 May 2025 13:49:58 +0200 Subject: [PATCH 102/135] Update GH actions --- .github/workflows/ci.yml | 2 +- .github/workflows/coverage_readme.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4645d13..8f05398 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml index 86309de..edb5ac4 100644 --- a/.github/workflows/coverage_readme.yml +++ b/.github/workflows/coverage_readme.yml @@ -22,10 +22,10 @@ jobs: persist-credentials: false fetch-depth: 0 - - name: Set up Python 3.12 + - name: Set up Python 3.13 uses: actions/setup-python@v5 with: - python-version: 3.12 + python-version: 3.13 cache: 'pip' cache-dependency-path: '**/pyproject.toml' From af98f87278a277ad15a3e3fd865924828404924a Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 26 May 2025 14:24:00 +0200 Subject: [PATCH 103/135] New: Add a validator helper function --- edtf/parser/grammar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index db6e93e..4f5a526 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -359,3 +359,8 @@ def parse_edtf(input_string, parse_all=True, fail_silently=False, debug=None): if debug: raise raise EDTFParseException(input_string, err) from None + + +def validate_edtf(input_string: str) -> bool: + """Returns True if the input string was successfully parsed; False if it isn't.""" + return parse_edtf(input_string, fail_silently=True) is not None From f97b627703cc24b256cae7e8645fa82fdcb53ed1 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 26 May 2025 14:24:57 +0200 Subject: [PATCH 104/135] Add validator to init --- edtf/__init__.py | 2 ++ edtf/parser/__init__.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/edtf/__init__.py b/edtf/__init__.py index 7bb2885..2265bc1 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -23,6 +23,7 @@ Unspecified, UnspecifiedIntervalSection, parse_edtf, + validate_edtf, ) from .convert import ( @@ -46,6 +47,7 @@ "trim_struct_time", "text_to_edtf", "parse_edtf", + "validate_edtf", # parser_exceptions "EDTFParseException", # parser_classes diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py index 43197d5..2d2b3b7 100644 --- a/edtf/parser/__init__.py +++ b/edtf/parser/__init__.py @@ -1,5 +1,5 @@ from .edtf_exceptions import EDTFParseException -from .grammar import parse_edtf +from .grammar import parse_edtf, validate_edtf from .parser_classes import ( UA, Consecutives, @@ -26,6 +26,7 @@ __all__ = [ "parse_edtf", + "validate_edtf", "EDTFParseException", "EDTFObject", "Date", From ae82b1191f5e5178627a82b3c26ccded915a3d03 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 26 May 2025 15:41:15 +0200 Subject: [PATCH 105/135] Rename validator --- edtf/__init__.py | 4 ++-- edtf/parser/__init__.py | 4 ++-- edtf/parser/grammar.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/edtf/__init__.py b/edtf/__init__.py index 2265bc1..0b0bfbf 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -22,8 +22,8 @@ UncertainOrApproximate, Unspecified, UnspecifiedIntervalSection, + is_valid_edtf, parse_edtf, - validate_edtf, ) from .convert import ( @@ -47,7 +47,7 @@ "trim_struct_time", "text_to_edtf", "parse_edtf", - "validate_edtf", + "is_valid_edtf", # parser_exceptions "EDTFParseException", # parser_classes diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py index 2d2b3b7..9cbf3c3 100644 --- a/edtf/parser/__init__.py +++ b/edtf/parser/__init__.py @@ -1,5 +1,5 @@ from .edtf_exceptions import EDTFParseException -from .grammar import parse_edtf, validate_edtf +from .grammar import is_valid_edtf, parse_edtf from .parser_classes import ( UA, Consecutives, @@ -26,7 +26,7 @@ __all__ = [ "parse_edtf", - "validate_edtf", + "is_valid_edtf", "EDTFParseException", "EDTFObject", "Date", diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 4f5a526..7ff3820 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -361,6 +361,6 @@ def parse_edtf(input_string, parse_all=True, fail_silently=False, debug=None): raise EDTFParseException(input_string, err) from None -def validate_edtf(input_string: str) -> bool: +def is_valid_edtf(input_string: str) -> bool: """Returns True if the input string was successfully parsed; False if it isn't.""" return parse_edtf(input_string, fail_silently=True) is not None From 0cbccef9867001b2fcadefb8bb39f88febfaaa2f Mon Sep 17 00:00:00 2001 From: aweakley Date: Tue, 27 May 2025 00:42:57 +0000 Subject: [PATCH 106/135] Update pyproject.toml Temporarily ignore UP031 pending merge of #73 --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 50434a2..e916189 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,4 +145,6 @@ ignore = [ "E501", # Ignore McCabe complexity (for now). "C901", + # Ignore percent format -> format specifier rule for now (pending merge of #73 which resolves them) + "UP031", ] From 3accd56b08c134da9639a358cd1dcb8f006f3f42 Mon Sep 17 00:00:00 2001 From: aweakley Date: Tue, 27 May 2025 00:45:36 +0000 Subject: [PATCH 107/135] Ruff formatting --- edtf/natlang/tests.py | 6 ++-- edtf/parser/tests.py | 66 +++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index d2c43a5..e0acaad 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -182,9 +182,9 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert ( - result == expected_output - ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + assert result == expected_output, ( + f"Failed for input: {input_text} - expected {expected_output}, got {result}" + ) @pytest.mark.benchmark diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c2dd711..f37c806 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -312,51 +312,51 @@ def test_edtf_examples(test_input, expected_tuple): # Unpack expected results based on their count if len(expected_tuple) == 1: - assert ( - result_date == expected_tuple[0] - ), f"Expected {expected_tuple[0]}, got {result_date}" + assert result_date == expected_tuple[0], ( + f"Expected {expected_tuple[0]}, got {result_date}" + ) elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert ( - result.lower_strict() == strict_date - ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" - assert ( - result.upper_strict() == strict_date - ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == strict_date, ( + f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + ) + assert result.upper_strict() == strict_date, ( + f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) From feb6593364590848909981b32978cd54b5cb41d9 Mon Sep 17 00:00:00 2001 From: aweakley Date: Tue, 27 May 2025 00:52:33 +0000 Subject: [PATCH 108/135] Update push step --- .github/workflows/coverage_readme.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml index 86309de..5e697f5 100644 --- a/.github/workflows/coverage_readme.yml +++ b/.github/workflows/coverage_readme.yml @@ -59,10 +59,9 @@ jobs: run: | sed -i '//,//c\\n\${{ steps.coverageComment.outputs.coverageHtml }}\n' ./README.md - - name: Commit & Push changes to README - run: | - git config --global user.name 'github-actions[bot]' - git config --global user.email 'github-actions[bot]@users.noreply.github.com' - git add README.md - git commit -m 'Update coverage badge in README' - git push + - name: Commit & Push changes to Readme + if: ${{ github.ref == 'refs/heads/main' }} + uses: actions-js/push@master + with: + message: Update coverage on Readme + github_token: ${{ secrets.GITHUB_TOKEN }} From 57150c65c10b522cec8278397c80ad316b6eb04d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 27 May 2025 00:53:04 +0000 Subject: [PATCH 109/135] Update coverage on Readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8b2324e..f157bb6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # python-edtf - +\n An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. From 565ab6485caff126ca5fe3d3915c07af3e2fc099 Mon Sep 17 00:00:00 2001 From: aweakley Date: Tue, 27 May 2025 01:09:13 +0000 Subject: [PATCH 110/135] Include pytest-cov --- dev-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index 19242af..c27d485 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,6 +2,7 @@ django>=4.2,<5.0 pytest pytest-benchmark +pytest-cov pytest-django ruff pre-commit From 86b154619e94559457e921109539a823913bd52b Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 27 May 2025 10:58:58 +0200 Subject: [PATCH 111/135] Annotate appsettings --- edtf/appsettings.py | 47 +++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 8e15846..e9b4d9d 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -12,7 +12,7 @@ except ImportError: EDTF = {} -SEASON_MONTHS_RANGE = EDTF.get( +SEASON_MONTHS_RANGE: dict[int, list[int]] = EDTF.get( "SEASON_MONTHS_RANGE", { # season id: [earliest_month, last_month] @@ -27,7 +27,7 @@ }, ) -SEASON_L2_MONTHS_RANGE = EDTF.get( +SEASON_L2_MONTHS_RANGE: dict[int, list[int]] = EDTF.get( "SEASON_L2_MONTHS_RANGE", { # season id: [earliest_month, last_month] @@ -67,9 +67,9 @@ }, ) -DAY_FIRST = EDTF.get("DAY_FIRST", False) # Americans! +DAY_FIRST: bool = EDTF.get("DAY_FIRST", False) # Americans! -SEASONS = EDTF.get( +SEASONS: dict[int, str] = EDTF.get( "SEASONS", { 21: "spring", @@ -78,25 +78,38 @@ 24: "winter", }, ) -INVERSE_SEASONS = EDTF.get("INVERSE_SEASONS", {v: k for k, v in SEASONS.items()}) +INVERSE_SEASONS: dict[str, int] = EDTF.get( + "INVERSE_SEASONS", {v: k for k, v in SEASONS.items()} +) # also need to interpret `fall` INVERSE_SEASONS["fall"] = 23 # changing these will break tests -PADDING_DAY_PRECISION = EDTF.get("PADDING_DAY_PRECISION", relativedelta(days=1)) -PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) -PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) -PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) -PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) -PADDING_CENTURY_PRECISION = EDTF.get( +PADDING_DAY_PRECISION: relativedelta = EDTF.get( + "PADDING_DAY_PRECISION", relativedelta(days=1) +) +PADDING_MONTH_PRECISION: relativedelta = EDTF.get( + "PADDING_MONTH_PRECISION", relativedelta(months=1) +) +PADDING_YEAR_PRECISION: relativedelta = EDTF.get( + "PADDING_YEAR_PRECISION", relativedelta(years=1) +) +PADDING_SEASON_PRECISION: relativedelta = EDTF.get( + "PADDING_SEASON_PRECISION", relativedelta(weeks=12) +) +PADDING_DECADE_PRECISION: relativedelta = EDTF.get( + "PADDING_DECADE_PRECISION", relativedelta(years=10) +) +PADDING_CENTURY_PRECISION: relativedelta = EDTF.get( "PADDING_CENTURY_PRECISION", relativedelta(years=100) ) -PADDING_MILLENNIUM_PRECISION = EDTF.get( +PADDING_MILLENNIUM_PRECISION: relativedelta = EDTF.get( "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) ) -MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) -MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) -MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) -DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) +MULTIPLIER_IF_UNCERTAIN: float = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) +MULTIPLIER_IF_APPROXIMATE: float = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) +MULTIPLIER_IF_BOTH: float = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) +DELTA_IF_UNKNOWN: relativedelta = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) +DELTA_IF_EMPTY: relativedelta = relativedelta(None) -DEBUG_PYPARSING = False +DEBUG_PYPARSING: bool = False From a771ec2ff14e8d5e2d40ed8fee0627f8e0aa58df Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 27 May 2025 10:59:20 +0200 Subject: [PATCH 112/135] parseString is an alias to parse_string --- edtf/parser/grammar.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 7ff3820..de84633 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -343,13 +343,20 @@ def f(toks): ) -def parse_edtf(input_string, parse_all=True, fail_silently=False, debug=None): +def parse_edtf( + input_string: str, + parse_all: bool = True, + fail_silently: bool = False, + debug: bool | None = None, +): if debug is None: debug = DEBUG_PYPARSING + if not input_string: raise EDTFParseException(input_string) + try: - p = edtfParser.parseString(input_string.strip(), parse_all) + p = edtfParser.parse_string(input_string.strip(), parse_all) if p: return p[0] return None From 24a5f607095acf81e131fcbaba71cff5fd60043b Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 27 May 2025 11:00:12 +0200 Subject: [PATCH 113/135] More fixes for correctness Also removed the one regex and replaced it with the "replace" method on strings. --- edtf/parser/parser_classes.py | 53 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index f4168b9..cfb4459 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,6 +1,5 @@ import calendar import math -import re from collections.abc import Callable from datetime import date, datetime from operator import add, sub @@ -123,7 +122,7 @@ def parse(cls, s): def __repr__(self) -> str: return f"{type(self).__name__}: '{str(self)}'" - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: message: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" raise NotImplementedError(f"{message} is not implemented.") @@ -143,7 +142,7 @@ def _get_fuzzy_padding(self, lean: str) -> relativedelta: """ Subclasses should override this to pad based on how precise they are. """ - return relativedelta(0) + return relativedelta(None) def get_is_approximate(self) -> bool: return getattr(self, "_is_approximate", False) @@ -269,7 +268,7 @@ def __init__( # noqa int(significant_digits) if significant_digits else None ) - def set_year(self, y: str): + def set_year(self, y: str | None): if y is None: raise AttributeError("Year must not be None") self._year = y @@ -300,10 +299,10 @@ def get_day(self) -> str | None: day = property(get_day, set_day) # noqa def __str__(self) -> str: - r = self._year - if self._month: + r = f"{self._year}" + if self._month is not None: r += f"-{self._month}" - if self._day: + if self._day is not None: r += f"-{self._day}" if self.significant_digits: r += f"S{self.significant_digits}" @@ -318,10 +317,11 @@ def lower_fuzzy(self) -> struct_time: sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) - total_digits = len(self._year) - insignificant_digits = total_digits - self.significant_digits - lower_year = ( - int(self._year) // (10**insignificant_digits) * (10**insignificant_digits) + total_digits: int = len(self._year) if self._year else 0 + i_year: int = int(self._year) if self._year else 0 + insignificant_digits: int = total_digits - self.significant_digits + lower_year: int = ( + i_year // (10**insignificant_digits) * (10**insignificant_digits) ) return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @@ -331,18 +331,25 @@ def upper_fuzzy(self) -> struct_time: add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) - total_digits: int = len(self._year) + total_digits: int = len(self._year) if self._year else 0 + i_year: int = int(self._year) if self._year else 0 insignificant_digits: int = total_digits - self.significant_digits - upper_year: int = (int(self._year) // (10**insignificant_digits) + 1) * ( + upper_year: int = (i_year // (10**insignificant_digits) + 1) * ( 10**insignificant_digits ) - 1 return struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) def _precise_year(self, lean: str) -> int: # Replace any ambiguous characters in the year string with 0s or 9s + if not self._year: + return 0 + if lean == EARLIEST: - return int(re.sub(r"X", r"0", self._year)) - return int(re.sub(r"X", r"9", self._year)) + rep = self._year.replace("X", "0") + else: + rep = self._year.replace("X", "9") + + return int(rep) def _precise_month(self, lean: str) -> int: if self._month and self._month != "XX": @@ -448,7 +455,7 @@ def parse_action(cls, toks): args = toks.asList() return cls(*args) - def __init__(self, *args): # noqa: super() raises not implemented + def __init__(self, *args) -> None: # noqa: super() raises not implemented if len(args) != 1: raise AssertionError("UA must have exactly one argument") ua = args[0] @@ -944,7 +951,7 @@ def _get_fuzzy_padding(self, lean: str): redundant uncertainly modifiers e.g. (2006~)~ will get two sets of fuzziness. """ - result = relativedelta(0) + result = relativedelta(None) if self.year_ua: result += ( @@ -1040,9 +1047,9 @@ def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __str__(self): - repr: str = ", ".join([str(o) for o in self.objects]) - return f"[{repr}]" + def __str__(self) -> str: + out: str = ", ".join([str(o) for o in self.objects]) + return f"[{out}]" def _strict_date(self, lean: str = EARLIEST) -> float: strict_dates = [x._strict_date(lean) for x in self.objects] @@ -1074,9 +1081,9 @@ def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __str__(self): - repr: str = ", ".join([str(o) for o in self.objects]) - return f"{{{repr}}}" + def __str__(self) -> str: + out: str = ", ".join([str(o) for o in self.objects]) + return f"{{{out}}}" def _strict_date(self, lean: str = EARLIEST) -> float: if lean == LATEST: From df15fd8d4c4b976f103144e9a91ffe69d19609f9 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 2 Jun 2025 11:57:22 +0200 Subject: [PATCH 114/135] Try 3.10 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f05398..dad48aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . From 6a91fa04aecd5ff5e10833edf77c712179bc628f Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 2 Jun 2025 11:57:34 +0200 Subject: [PATCH 115/135] More type annotations --- edtf/parser/parser_classes.py | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index cfb4459..0047aec 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -757,17 +757,17 @@ def _get_fuzzy_padding(self, lean) -> relativedelta | None: class LongYear(EDTFObject): def __init__(self, year: str, significant_digits: str | None = None): # noqa - self.year = year - self.significant_digits = ( + self.year: str = year + self.significant_digits: int | None = ( int(significant_digits) if significant_digits else None ) - def __str__(self): + def __str__(self) -> str: if self.significant_digits: return f"Y{self.year}S{self.significant_digits}" return f"Y{self.year}" - def _precise_year(self): + def _precise_year(self) -> int: return int(self.year) def _strict_date(self, lean: str = EARLIEST) -> struct_time: @@ -776,10 +776,10 @@ def _strict_date(self, lean: str = EARLIEST) -> struct_time: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - def estimated(self): + def estimated(self) -> int: return self._precise_year() - def lower_fuzzy(self): + def lower_fuzzy(self) -> struct_time: full_year = self._precise_year() strict_val = self.lower_strict() if not self.significant_digits: @@ -798,7 +798,7 @@ def lower_fuzzy(self): self._get_fuzzy_padding(EARLIEST), ) - def upper_fuzzy(self): + def upper_fuzzy(self) -> struct_time: full_year = self._precise_year() strict_val = self.upper_strict() if not self.significant_digits: @@ -828,7 +828,7 @@ def __init__(self, year, season, **kwargs): # noqa def __str__(self) -> str: return f"{self.year}-{self.season}" - def _precise_month(self, lean): + def _precise_month(self, lean: str) -> int: rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] @@ -881,13 +881,13 @@ def __init__( # noqa season_ua, all_ua, ] - self.is_uncertain = any( + self.is_uncertain: bool = any( item.is_uncertain for item in uas if hasattr(item, "is_uncertain") ) - self.is_approximate = any( + self.is_approximate: bool = any( item.is_approximate for item in uas if hasattr(item, "is_approximate") ) - self.is_uncertain_and_approximate = any( + self.is_uncertain_and_approximate: bool = any( item.is_uncertain_and_approximate for item in uas if hasattr(item, "is_uncertain_and_approximate") @@ -930,22 +930,22 @@ def set_year(self, y): # Year can be None. year = property(Date.get_year, set_year) # noqa - def _precise_year(self, lean: str): + def _precise_year(self, lean: str) -> int: if self.season: return self.season._precise_year(lean) return super()._precise_year(lean) - def _precise_month(self, lean: str): + def _precise_month(self, lean: str) -> int: if self.season: return self.season._precise_month(lean) return super()._precise_month(lean) - def _precise_day(self, lean: str): + def _precise_day(self, lean: str) -> int: if self.season: return self.season._precise_day(lean) return super()._precise_day(lean) - def _get_fuzzy_padding(self, lean: str): + def _get_fuzzy_padding(self, lean: str) -> struct_time: """ This is not a perfect interpretation as fuzziness is introduced for redundant uncertainly modifiers e.g. (2006~)~ will get two sets of @@ -1024,17 +1024,17 @@ def __init__(self, lower=None, upper=None): # noqa else: self.upper = upper - def __str__(self): + def __str__(self) -> str: return f"{self.lower or ''}..{self.upper or ''}" class EarlierConsecutives(Level1Interval): - def __str__(self): + def __str__(self) -> str: return f"{self.lower}{self.upper}" class LaterConsecutives(Level1Interval): - def __str__(self): + def __str__(self) -> str: return f"{self.lower}{self.upper}" From d3d0cd59fc2d85e17d8c9c64a8392f68e733e344 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 2 Jun 2025 11:59:06 +0200 Subject: [PATCH 116/135] Update supported python in pyproject --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2f098bb..1980551 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pyparsing", ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" -requires-python = ">=3.11" +requires-python = ">=3.10" readme = {file = "README.md", content-type = "text/markdown"} authors = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au"}, From 9bd142d4e9ebde4cd771e5ae1b9351abc148b705 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 2 Jun 2025 12:09:12 +0200 Subject: [PATCH 117/135] Fixed: UA is a single state, no need for append --- edtf/parser/parser_classes.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0047aec..aa8144a 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -465,14 +465,13 @@ def __init__(self, *args) -> None: # noqa: super() raises not implemented self.is_uncertain_and_approximate: bool = "%" in ua def __str__(self) -> str: - d: list = [] if self.is_uncertain: - d.append("?") - if self.is_approximate: - d.append("~") - if self.is_uncertain_and_approximate: - d.append("%") - return "".join(d) + return "?" + elif self.is_approximate: + return "~" + elif self.is_uncertain_and_approximate: + return "%" + return "" def _get_multiplier(self) -> float | None: if self.is_uncertain_and_approximate: From 517ba18d3c1b41d97a6ba2b5221d84447205e75b Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 2 Jun 2025 12:14:02 +0200 Subject: [PATCH 118/135] Add mypy and pip to test dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1980551..30a1a9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,8 @@ test = [ "coverage", "pytest-cov", "junitparser", + "mypy>=1.15.0", + "pip>=25.1.1", ] [project.urls] From 4a03a68226d9ec8858bfb8ff8044b6f7991daf22 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 18 Jun 2025 21:52:33 +0200 Subject: [PATCH 119/135] fix Python2 leftofers --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8fe2de1..4b4fb2d 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ UncertainOrApproximate: '1979-08~' # normalised string representation (some different EDTF strings have identical meanings) >>> ->>> unicode(e) -u'1979-08~' +>>> str(e) +'1979-08~' # Derive Python date objects @@ -446,7 +446,7 @@ False ### Comparisons -Two EDTF dates are considered equal if their `unicode()` representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. +Two EDTF dates are considered equal if their `str()` representations are the same. An EDTF date is considered greater than another if its `lower_strict` value is later. ## Django ORM field From 6658ea9b28fe3ee498d547219b2a80d18a8097e4 Mon Sep 17 00:00:00 2001 From: rettinghaus Date: Wed, 2 Jul 2025 21:45:11 +0200 Subject: [PATCH 120/135] clarify methods --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b4fb2d..956e2ab 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ Level2Season ExponentialYear ``` -All of these implement `upper/lower_strict/fuzzy()` methods to derive `struct_time` objects, except of UnspecifiedIntervalSection, that can also return math.inf value +All of these implement `upper_strict()/lower_strict()/fuzzy()` methods to derive `struct_time` objects, except of UnspecifiedIntervalSection, that can also return math.inf value The `*Interval` instances have `upper` and `lower` properties that are themselves `EDTFObject` instances. From f60f5a912974ad42dfa709597911379d29d1f654 Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Thu, 7 Aug 2025 12:47:17 +1000 Subject: [PATCH 121/135] edtf.util: new @remapparams decorator to rename obsolete parameters to their modern names --- edtf/util.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 edtf/util.py diff --git a/edtf/util.py b/edtf/util.py new file mode 100644 index 0000000..b1d5673 --- /dev/null +++ b/edtf/util.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +''' +Assorted utility functions. +''' + +from functools import update_wrapper +from logging import warning +from traceback import extract_stack + +def remapparams(**remap): + ''' + Remap the specified named parameters. + + Example to support an obsolete `parseAll` parameter: + + @remapparams(parseAll='parse_all') + def parse(s, parse_all=True): + + ''' + if not remap: + raise ValueError('no parameters specified for remapping') + for old, new in remap.items(): + if new in remap: + raise ValueError(f'{old}={new!r}: {new!r} is also remapped') + + def remapparams_decorator(func): + '''The decorator to apply the remappings.''' + # a record of callers whose parameters were remapped + remapped_callers = set() + + def remapparams_wrapper(*a, **kw): + remappings = {} + for param, value in list(kw.items()): + try: + remapped = remap[param] + except KeyError: + continue + if remapped in kw: + raise ValueError(f'remap {param}= to {remapped}=: this is already present in the keyword arguments') + del kw[param] + kw[remapped] = value + remappings[param] = remapped + if remappings: + caller_frame = extract_stack(limit=2)[-2] + caller_key = caller_frame.filename, caller_frame.lineno + if caller_key not in remapped_callers: + warning( + "call of %s.%s() from %s:%d: remapped the following obsolete parameters: %s", + func.__module__, func.__name__, + caller_frame.filename, caller_frame.lineno, + ", ".join(sorted(f'{old}->{new}' for old, new in remappings.items())), + ) + remapped_callers.add(caller_key) + return func(*a, **kw) + + update_wrapper(remapparams_wrapper, func) + return remapparams_wrapper + + return remapparams_decorator + +if __name__ == '__main__': + + @remapparams(parseAll='parse_all') + def parser(s, parse_all=True): + pass + + assert parser.__name__ == 'parser' + parser('foo') + # this should not warn + parser('foo', parse_all=False) + # this should warn, but only once + for _ in 1, 2: + parser('foo', parseAll=False) + try: + parser('foo', parseAll=False, parse_all=True) + except ValueError: + pass + else: + assert False, "expected ValueError because of duplicated parameters" + + try: + @remapparams() + def no_remappings(): + pass + except ValueError: + pass + else: + assert False, "expected ValueError from @remapparams() because no remappings" + try: + @remapparams(p1='p2', p2='p3') + def no_remappings(): + pass + except ValueError: + pass + else: + assert False, "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" From 94ec9c21c329fb2f45ae6003426d843900445503 Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Thu, 7 Aug 2025 12:57:09 +1000 Subject: [PATCH 122/135] edtf.parser.grammar: decorate parse_edtf() to accept the old parseAll parameter --- edtf/parser/grammar.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index de84633..4480555 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -5,6 +5,7 @@ import pyparsing from edtf.appsettings import DEBUG_PYPARSING +from edtf.util import remapparams pyparsing.ParserElement.enablePackrat() @@ -343,6 +344,7 @@ def f(toks): ) +@remapparams(parseAll='parse_all') def parse_edtf( input_string: str, parse_all: bool = True, From d142a7544197d2ceba02e7d337e9df8f9daa7abb Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Thu, 7 Aug 2025 13:39:33 +1000 Subject: [PATCH 123/135] edtf.utils: clean lint --- edtf/util.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/edtf/util.py b/edtf/util.py index b1d5673..11d0c6e 100644 --- a/edtf/util.py +++ b/edtf/util.py @@ -8,6 +8,7 @@ from logging import warning from traceback import extract_stack + def remapparams(**remap): ''' Remap the specified named parameters. @@ -65,7 +66,7 @@ def remapparams_wrapper(*a, **kw): def parser(s, parse_all=True): pass - assert parser.__name__ == 'parser' + assert parser.__name__ == 'parser' # noqa: S101 parser('foo') # this should not warn parser('foo', parse_all=False) @@ -77,7 +78,9 @@ def parser(s, parse_all=True): except ValueError: pass else: - assert False, "expected ValueError because of duplicated parameters" + raise AssertionError( + "expected ValueError because of duplicated parameters" + ) try: @remapparams() @@ -86,7 +89,9 @@ def no_remappings(): except ValueError: pass else: - assert False, "expected ValueError from @remapparams() because no remappings" + raise AssertionError( + "expected ValueError from @remapparams() because no remappings" + ) try: @remapparams(p1='p2', p2='p3') def no_remappings(): @@ -94,4 +99,6 @@ def no_remappings(): except ValueError: pass else: - assert False, "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" + raise AssertionError( + "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" + ) From 36fa3462d79d0497960bbd4f425ccb147b9bb4a1 Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Thu, 7 Aug 2025 13:45:40 +1000 Subject: [PATCH 124/135] ruff format --- edtf/parser/grammar.py | 2 +- edtf/util.py | 51 ++++++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 4480555..cdb64dc 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -344,7 +344,7 @@ def f(toks): ) -@remapparams(parseAll='parse_all') +@remapparams(parseAll="parse_all") def parse_edtf( input_string: str, parse_all: bool = True, diff --git a/edtf/util.py b/edtf/util.py index 11d0c6e..5241ae8 100644 --- a/edtf/util.py +++ b/edtf/util.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -''' +""" Assorted utility functions. -''' +""" from functools import update_wrapper from logging import warning @@ -10,7 +10,7 @@ def remapparams(**remap): - ''' + """ Remap the specified named parameters. Example to support an obsolete `parseAll` parameter: @@ -18,15 +18,15 @@ def remapparams(**remap): @remapparams(parseAll='parse_all') def parse(s, parse_all=True): - ''' + """ if not remap: - raise ValueError('no parameters specified for remapping') + raise ValueError("no parameters specified for remapping") for old, new in remap.items(): if new in remap: - raise ValueError(f'{old}={new!r}: {new!r} is also remapped') + raise ValueError(f"{old}={new!r}: {new!r} is also remapped") def remapparams_decorator(func): - '''The decorator to apply the remappings.''' + """The decorator to apply the remappings.""" # a record of callers whose parameters were remapped remapped_callers = set() @@ -38,7 +38,9 @@ def remapparams_wrapper(*a, **kw): except KeyError: continue if remapped in kw: - raise ValueError(f'remap {param}= to {remapped}=: this is already present in the keyword arguments') + raise ValueError( + f"remap {param}= to {remapped}=: this is already present in the keyword arguments" + ) del kw[param] kw[remapped] = value remappings[param] = remapped @@ -48,9 +50,13 @@ def remapparams_wrapper(*a, **kw): if caller_key not in remapped_callers: warning( "call of %s.%s() from %s:%d: remapped the following obsolete parameters: %s", - func.__module__, func.__name__, - caller_frame.filename, caller_frame.lineno, - ", ".join(sorted(f'{old}->{new}' for old, new in remappings.items())), + func.__module__, + func.__name__, + caller_frame.filename, + caller_frame.lineno, + ", ".join( + sorted(f"{old}->{new}" for old, new in remappings.items()) + ), ) remapped_callers.add(caller_key) return func(*a, **kw) @@ -60,29 +66,29 @@ def remapparams_wrapper(*a, **kw): return remapparams_decorator -if __name__ == '__main__': - @remapparams(parseAll='parse_all') +if __name__ == "__main__": + + @remapparams(parseAll="parse_all") def parser(s, parse_all=True): pass - assert parser.__name__ == 'parser' # noqa: S101 - parser('foo') + assert parser.__name__ == "parser" # noqa: S101 + parser("foo") # this should not warn - parser('foo', parse_all=False) + parser("foo", parse_all=False) # this should warn, but only once for _ in 1, 2: - parser('foo', parseAll=False) + parser("foo", parseAll=False) try: - parser('foo', parseAll=False, parse_all=True) + parser("foo", parseAll=False, parse_all=True) except ValueError: pass else: - raise AssertionError( - "expected ValueError because of duplicated parameters" - ) + raise AssertionError("expected ValueError because of duplicated parameters") try: + @remapparams() def no_remappings(): pass @@ -93,7 +99,8 @@ def no_remappings(): "expected ValueError from @remapparams() because no remappings" ) try: - @remapparams(p1='p2', p2='p3') + + @remapparams(p1="p2", p2="p3") def no_remappings(): pass except ValueError: From f4a73fe3097b56045b12c54c7f7fe8bb69cce3c3 Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Fri, 8 Aug 2025 09:47:34 +1000 Subject: [PATCH 125/135] edit.utils.remapparams: move the tests from the main section into edtf.tests --- edtf/tests.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ edtf/util.py | 44 -------------------------------------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/edtf/tests.py b/edtf/tests.py index 9812b65..7ee9805 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -4,6 +4,7 @@ from time import struct_time from edtf import convert +from edtf.utils import remapparams def test_dt_to_struct_time_for_datetime(): @@ -107,3 +108,46 @@ def test_roll_negative_time_fields(): assert convert._roll_negative_time_fields( year, month, day, hour, minute, second ) == (-102, 5, 24, 21, 41, 47) + +def test_remapparams(): + + @remapparams(parseAll="parse_all") + def parser(s, parse_all=True): + pass + + assert parser.__name__ == "parser" # noqa: S101 + parser("foo") + # this should not warn + parser("foo", parse_all=False) + # this should warn, but only once + for _ in 1, 2: + parser("foo", parseAll=False) + try: + parser("foo", parseAll=False, parse_all=True) + except ValueError: + pass + else: + raise AssertionError("expected ValueError because of duplicated parameters") + + try: + + @remapparams() + def no_remappings(): + pass + except ValueError: + pass + else: + raise AssertionError( + "expected ValueError from @remapparams() because no remappings" + ) + try: + + @remapparams(p1="p2", p2="p3") + def no_remappings(): + pass + except ValueError: + pass + else: + raise AssertionError( + "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" + ) diff --git a/edtf/util.py b/edtf/util.py index 5241ae8..146eec2 100644 --- a/edtf/util.py +++ b/edtf/util.py @@ -65,47 +65,3 @@ def remapparams_wrapper(*a, **kw): return remapparams_wrapper return remapparams_decorator - - -if __name__ == "__main__": - - @remapparams(parseAll="parse_all") - def parser(s, parse_all=True): - pass - - assert parser.__name__ == "parser" # noqa: S101 - parser("foo") - # this should not warn - parser("foo", parse_all=False) - # this should warn, but only once - for _ in 1, 2: - parser("foo", parseAll=False) - try: - parser("foo", parseAll=False, parse_all=True) - except ValueError: - pass - else: - raise AssertionError("expected ValueError because of duplicated parameters") - - try: - - @remapparams() - def no_remappings(): - pass - except ValueError: - pass - else: - raise AssertionError( - "expected ValueError from @remapparams() because no remappings" - ) - try: - - @remapparams(p1="p2", p2="p3") - def no_remappings(): - pass - except ValueError: - pass - else: - raise AssertionError( - "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" - ) From dedb361b2f39ad8366e5c483a9f28f13146bd69e Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Fri, 8 Aug 2025 10:07:15 +1000 Subject: [PATCH 126/135] edtf.tests: exercise parse_edtf using the obsolete and modern mode parameters --- edtf/tests.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/edtf/tests.py b/edtf/tests.py index 7ee9805..cc0f31e 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -4,7 +4,9 @@ from time import struct_time from edtf import convert -from edtf.utils import remapparams +from edtf.parser.edtf_exceptions import EDTFParseException +from edtf.parser.grammar import parse_edtf +from edtf.util import remapparams def test_dt_to_struct_time_for_datetime(): @@ -151,3 +153,31 @@ def no_remappings(): raise AssertionError( "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" ) + +def test_remapparams_parse_edtf(): + edtf_s = '2005-09-24T10:00:00' # ISO8601 example from the EDTF spec + dat = parse_edtf(edtf_s) # implicit parse_all=True + assert dat.isoformat() == edtf_s + assert parse_edtf(edtf_s, parse_all=True).isoformat() == edtf_s + assert parse_edtf(edtf_s, parseAll=True).isoformat() == edtf_s + assert parse_edtf(f'{edtf_s} SNORT', parse_all=False).isoformat() == edtf_s + assert parse_edtf(f'{edtf_s} SNORT', parseAll=False).isoformat() == edtf_s + # make sure parse_all=True fails the SNORT parse + try: + parse_edtf(f'{edtf_s} SNORT') + except EDTFParseException: + pass + else: + raise AssertionError('expected EDTFParseException') + try: + parse_edtf(f'{edtf_s} SNORT', parse_all=True) + except EDTFParseException: + pass + else: + raise AssertionError('expected EDTFParseException') + try: + parse_edtf(f'{edtf_s} SNORT', parseAll=True) + except EDTFParseException: + pass + else: + raise AssertionError('expected EDTFParseException') From 83c8ba2d07f1f63bdb666397cb118ee59b2fc3f1 Mon Sep 17 00:00:00 2001 From: Cameron Simpson Date: Fri, 8 Aug 2025 10:07:43 +1000 Subject: [PATCH 127/135] edtf.tests: autoformat --- edtf/tests.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/edtf/tests.py b/edtf/tests.py index cc0f31e..837e580 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -111,8 +111,8 @@ def test_roll_negative_time_fields(): year, month, day, hour, minute, second ) == (-102, 5, 24, 21, 41, 47) -def test_remapparams(): +def test_remapparams(): @remapparams(parseAll="parse_all") def parser(s, parse_all=True): pass @@ -154,30 +154,31 @@ def no_remappings(): "expected ValueError from @remapparams() because p1 remaps to another remapped parameter" ) + def test_remapparams_parse_edtf(): - edtf_s = '2005-09-24T10:00:00' # ISO8601 example from the EDTF spec - dat = parse_edtf(edtf_s) # implicit parse_all=True + edtf_s = "2005-09-24T10:00:00" # ISO8601 example from the EDTF spec + dat = parse_edtf(edtf_s) # implicit parse_all=True assert dat.isoformat() == edtf_s assert parse_edtf(edtf_s, parse_all=True).isoformat() == edtf_s assert parse_edtf(edtf_s, parseAll=True).isoformat() == edtf_s - assert parse_edtf(f'{edtf_s} SNORT', parse_all=False).isoformat() == edtf_s - assert parse_edtf(f'{edtf_s} SNORT', parseAll=False).isoformat() == edtf_s + assert parse_edtf(f"{edtf_s} SNORT", parse_all=False).isoformat() == edtf_s + assert parse_edtf(f"{edtf_s} SNORT", parseAll=False).isoformat() == edtf_s # make sure parse_all=True fails the SNORT parse try: - parse_edtf(f'{edtf_s} SNORT') + parse_edtf(f"{edtf_s} SNORT") except EDTFParseException: pass else: - raise AssertionError('expected EDTFParseException') + raise AssertionError("expected EDTFParseException") try: - parse_edtf(f'{edtf_s} SNORT', parse_all=True) + parse_edtf(f"{edtf_s} SNORT", parse_all=True) except EDTFParseException: pass else: - raise AssertionError('expected EDTFParseException') + raise AssertionError("expected EDTFParseException") try: - parse_edtf(f'{edtf_s} SNORT', parseAll=True) + parse_edtf(f"{edtf_s} SNORT", parseAll=True) except EDTFParseException: pass else: - raise AssertionError('expected EDTFParseException') + raise AssertionError("expected EDTFParseException") From 5069913aa41e6ccfb4a06e4375e3304d91eb5b5b Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Sat, 4 Oct 2025 23:46:40 +0200 Subject: [PATCH 128/135] chore: update actions --- .github/workflows/ci.yml | 4 ++-- .github/workflows/coverage_readme.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dad48aa..4063e10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,10 +24,10 @@ jobs: working-directory: . steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml index eb5db29..951c1ec 100644 --- a/.github/workflows/coverage_readme.yml +++ b/.github/workflows/coverage_readme.yml @@ -17,13 +17,13 @@ jobs: update-coverage-on-readme: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: persist-credentials: false fetch-depth: 0 - name: Set up Python 3.13 - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.13 cache: 'pip' From 03c814558f5cfd5798844588c31c7e20d2a2b79c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 5 Oct 2025 02:49:00 +0000 Subject: [PATCH 129/135] Update coverage on Readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 956e2ab..aa72a4c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # python-edtf -\n +Coverage
Coverage Report
FileStmtsMissCoverMissing
edtf
   __init__.py40100% 
   appsettings.py29293%12–13
   convert.py631182%11–19, 21, 72
   fields.py1191190%1, 3–8, 10–13, 15, 23, 29, 31, 33–35, 38–39, 51–58, 60, 63, 65–70, 72–76, 78–79, 81, 83–84, 86, 88–89, 91, 93–95, 97–98, 100, 102–105, 107, 109–112, 114, 123–125, 128, 131–132, 135–136, 139–140, 142–144, 147, 151, 153, 155, 157, 160–173, 179, 181–182, 184–185, 190–191, 193–194, 196, 204, 206, 208–209, 212–213, 223–226, 234
   jdutil.py984455%37, 55, 91–92, 287, 291, 314, 316–317, 319, 321, 346, 348, 350, 370–372, 374, 376, 378, 381–383, 385, 387, 389, 392–393, 395, 397, 399–400, 402, 405–407, 410–413, 415, 417, 424, 431
   tests.py119496%137–138, 148–149
   util.py330100% 
edtf/natlang
   __init__.py20100% 
   en.py1581193%85, 88, 145, 181–182, 192–193, 218–219, 223, 290
   tests.py10190%211
edtf/parser
   __init__.py40100% 
   edtf_exceptions.py15286%24, 29
   grammar.py133496%364, 367, 369, 375
   parser_classes.py66110683%114–116, 123, 126, 188, 194–198, 205–207, 214–218, 227–229, 234–240, 273, 345, 358–359, 390–394, 397, 412, 415–419, 422–426, 444–446, 474, 483, 546, 560, 564, 600, 608, 612, 659–660, 666, 684–685, 688, 694, 700, 702, 706, 713, 754, 779, 785, 789, 804, 808, 898, 908, 919–920, 922, 929, 932, 943, 948, 953, 989, 992, 998, 1003, 1005–1013, 1028, 1033, 1111, 1116, 1149
   tests.py89198%412
TOTAL153730580% 
+ An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects. From 5f042960bd836eb170f5a7b313f51f2fafc4ea8a Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Fri, 10 Oct 2025 11:09:38 +0200 Subject: [PATCH 130/135] feat: add support for Python 3.14 --- .github/workflows/ci.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4063e10..4c5f2de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f2b2213..c496a46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] [project.optional-dependencies] From e2bf40d265b3c717218b18f3b4e32f675b0806e6 Mon Sep 17 00:00:00 2001 From: rettinghaus Date: Fri, 26 Dec 2025 13:46:18 +0100 Subject: [PATCH 131/135] update actions --- .github/workflows/ci.yml | 4 ++-- .github/workflows/coverage_readme.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4c5f2de..6c44397 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: working-directory: . steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 @@ -93,7 +93,7 @@ jobs: pytest -m benchmark --benchmark-json=./output.json - name: Download previous benchmark data - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ./cache key: ${{ runner.os }}-benchmark diff --git a/.github/workflows/coverage_readme.yml b/.github/workflows/coverage_readme.yml index 951c1ec..860ace3 100644 --- a/.github/workflows/coverage_readme.yml +++ b/.github/workflows/coverage_readme.yml @@ -17,7 +17,7 @@ jobs: update-coverage-on-readme: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: persist-credentials: false fetch-depth: 0 From d9fdaffd96840a80352e79c1100d0a19657115bd Mon Sep 17 00:00:00 2001 From: rettinghaus Date: Fri, 2 Jan 2026 19:01:08 +0100 Subject: [PATCH 132/135] set minimal version for pyparsing --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f2b2213..594ce37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ license = { file = "LICENSE" } keywords = ['edtf'] dependencies = [ "python-dateutil", - "pyparsing", + "pyparsing>=3.0.0", ] description = "Python implementation of Library of Congress EDTF (Extended Date Time Format) specification" requires-python = ">=3.10" diff --git a/requirements.txt b/requirements.txt index 1656e27..f142bc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ python-dateutil -pyparsing +pyparsing >= 3.0.0 From a47acf48af7f7f6e1d0ce3473fb6e91c88d62319 Mon Sep 17 00:00:00 2001 From: rettinghaus Date: Fri, 2 Jan 2026 19:01:20 +0100 Subject: [PATCH 133/135] fix warnings --- edtf/parser/grammar.py | 53 +++++++++++++++++------------------ edtf/parser/parser_classes.py | 4 +-- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index cdb64dc..146f27b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -7,8 +7,7 @@ from edtf.appsettings import DEBUG_PYPARSING from edtf.util import remapparams -pyparsing.ParserElement.enablePackrat() - +pyparsing.ParserElement.enable_packrat() from pyparsing import ( Combine, NotAny, @@ -20,7 +19,7 @@ Word, ZeroOrMore, nums, - oneOf, + one_of, ) from pyparsing import Literal as L @@ -50,18 +49,18 @@ Unspecified, ) -oneThru12 = oneOf([f"{i:02}" for i in range(1, 13)]) -oneThru13 = oneOf([f"{i:02}" for i in range(1, 14)]) -oneThru23 = oneOf([f"{i:02}" for i in range(1, 24)]) -zeroThru23 = oneOf([f"{i:02}" for i in range(0, 24)]) -oneThru29 = oneOf([f"{i:02}" for i in range(1, 30)]) -oneThru30 = oneOf([f"{i:02}" for i in range(1, 31)]) -oneThru31 = oneOf([f"{i:02}" for i in range(1, 32)]) -oneThru59 = oneOf([f"{i:02}" for i in range(1, 60)]) -zeroThru59 = oneOf([f"{i:02}" for i in range(0, 60)]) +oneThru12 = one_of([f"{i:02}" for i in range(1, 13)]) +oneThru13 = one_of([f"{i:02}" for i in range(1, 14)]) +oneThru23 = one_of([f"{i:02}" for i in range(1, 24)]) +zeroThru23 = one_of([f"{i:02}" for i in range(0, 24)]) +oneThru29 = one_of([f"{i:02}" for i in range(1, 30)]) +oneThru30 = one_of([f"{i:02}" for i in range(1, 31)]) +oneThru31 = one_of([f"{i:02}" for i in range(1, 32)]) +oneThru59 = one_of([f"{i:02}" for i in range(1, 60)]) +zeroThru59 = one_of([f"{i:02}" for i in range(0, 60)]) digit = Word(nums, exact=1) -positiveDigit = Word(nums, exact=1, excludeChars="0") +positiveDigit = Word(nums, exact=1, exclude_chars="0") positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) second = zeroThru59 @@ -71,8 +70,8 @@ month = oneThru12("month") monthDay = ( - (oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) - ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) + (one_of("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) + ^ (one_of("04 06 09 11")("month") + "-" + oneThru30("day")) ^ (L("02")("month") + "-" + oneThru29("day")) ) @@ -95,15 +94,15 @@ date = Combine(year ^ yearMonth ^ yearMonthDay)("date") Date.set_parser(date) -zoneOffsetHour = oneThru13 -zoneOffset = L("Z") ^ ( +zone_offsetHour = oneThru13 +zone_offset = L("Z") ^ ( Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) + + (zone_offsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) ) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") -time = Combine(baseTime + Optional(zoneOffset))("time") +time = Combine(baseTime + Optional(zone_offset))("time") dateAndTime = date + "T" + time DateAndTime.set_parser(dateAndTime) @@ -117,10 +116,10 @@ # (* ************************** Level 1 *************************** *) # (* ** Auxiliary Assignments for Level 1 ** *) -UASymbol = Combine(oneOf("? ~ %")) +UASymbol = Combine(one_of("? ~ %")) UA.set_parser(UASymbol) -seasonNumber = oneOf("21 22 23 24") +seasonNumber = one_of("21 22 23 24") # (* *** Season (unqualified) *** *) season = year + "-" + seasonNumber("season") @@ -153,9 +152,9 @@ def f(toks): l1Start = ".." ^ uaDateOrSeason -l1Start.addParseAction(f) +l1Start.add_parse_action(f) l1End = uaDateOrSeason ^ ".." -l1End.addParseAction(f) +l1End.add_parse_action(f) level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( "lower" @@ -197,7 +196,7 @@ def f(toks): dayWithX = Combine(("X" + digitOrX) ^ (digitOrX + "X"))("day") # 2-digit month with at least one 'X' present -monthWithX = Combine(oneOf("0X 1X") ^ ("X" + digitOrX))("month") +monthWithX = Combine(one_of("0X 1X") ^ ("X" + digitOrX))("month") # 4-digit year with at least one 'X' present yearWithX = Combine( @@ -301,8 +300,8 @@ def f(toks): ^ consecutives ) -earlier = L("..").addParseAction(f)("lower") + date("upper").addParseAction(f) -later = date("lower").addParseAction(f) + L("..").addParseAction(f)("upper") +earlier = L("..").add_parse_action(f)("lower") + date("upper").add_parse_action(f) +later = date("lower").add_parse_action(f) + L("..").add_parse_action(f)("upper") EarlierConsecutives.set_parser(earlier) LaterConsecutives.set_parser(later) @@ -323,7 +322,7 @@ def f(toks): # (* *** L2 Season *** *) -seasonL2Number = oneOf("21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41") +seasonL2Number = one_of("21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41") l2season = year + "-" + seasonL2Number("season") Level2Season.set_parser(l2season) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index f5d2581..67dd8ee 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -104,7 +104,7 @@ class EDTFObject: @classmethod def set_parser(cls, p): cls.parser = p - p.addParseAction(cls.parse_action) + p.add_parse_action(cls.parse_action) @classmethod def parse_action(cls, toks): @@ -117,7 +117,7 @@ def parse_action(cls, toks): @classmethod def parse(cls, s): - return cls.parser.parseString(s)[0] + return cls.parser.parse_string(s)[0] def __repr__(self) -> str: return f"{type(self).__name__}: '{str(self)}'" From df980aad1ebab0a7563699dae5bdedf0781fa574 Mon Sep 17 00:00:00 2001 From: rettinghaus Date: Fri, 2 Jan 2026 19:04:50 +0100 Subject: [PATCH 134/135] fix formatting with ruff --- edtf/parser/grammar.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 146f27b..0624a92 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -322,7 +322,9 @@ def f(toks): # (* *** L2 Season *** *) -seasonL2Number = one_of("21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41") +seasonL2Number = one_of( + "21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41" +) l2season = year + "-" + seasonL2Number("season") Level2Season.set_parser(l2season) From 603859e921fe7775c7702adc2819e9115d480426 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 9 Jan 2026 01:01:28 +0000 Subject: [PATCH 135/135] Update coverage on Readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aa72a4c..206f4f4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # python-edtf -Coverage
Coverage Report
FileStmtsMissCoverMissing
edtf
   __init__.py40100% 
   appsettings.py29293%12–13
   convert.py631182%11–19, 21, 72
   fields.py1191190%1, 3–8, 10–13, 15, 23, 29, 31, 33–35, 38–39, 51–58, 60, 63, 65–70, 72–76, 78–79, 81, 83–84, 86, 88–89, 91, 93–95, 97–98, 100, 102–105, 107, 109–112, 114, 123–125, 128, 131–132, 135–136, 139–140, 142–144, 147, 151, 153, 155, 157, 160–173, 179, 181–182, 184–185, 190–191, 193–194, 196, 204, 206, 208–209, 212–213, 223–226, 234
   jdutil.py984455%37, 55, 91–92, 287, 291, 314, 316–317, 319, 321, 346, 348, 350, 370–372, 374, 376, 378, 381–383, 385, 387, 389, 392–393, 395, 397, 399–400, 402, 405–407, 410–413, 415, 417, 424, 431
   tests.py119496%137–138, 148–149
   util.py330100% 
edtf/natlang
   __init__.py20100% 
   en.py1581193%85, 88, 145, 181–182, 192–193, 218–219, 223, 290
   tests.py10190%211
edtf/parser
   __init__.py40100% 
   edtf_exceptions.py15286%24, 29
   grammar.py133496%364, 367, 369, 375
   parser_classes.py66110683%114–116, 123, 126, 188, 194–198, 205–207, 214–218, 227–229, 234–240, 273, 345, 358–359, 390–394, 397, 412, 415–419, 422–426, 444–446, 474, 483, 546, 560, 564, 600, 608, 612, 659–660, 666, 684–685, 688, 694, 700, 702, 706, 713, 754, 779, 785, 789, 804, 808, 898, 908, 919–920, 922, 929, 932, 943, 948, 953, 989, 992, 998, 1003, 1005–1013, 1028, 1033, 1111, 1116, 1149
   tests.py89198%412
TOTAL153730580% 
+Coverage
Coverage Report
FileStmtsMissCoverMissing
edtf
   __init__.py40100% 
   appsettings.py29293%12–13
   convert.py631182%11–19, 21, 72
   fields.py1191190%1, 3–8, 10–13, 15, 23, 29, 31, 33–35, 38–39, 51–58, 60, 63, 65–70, 72–76, 78–79, 81, 83–84, 86, 88–89, 91, 93–95, 97–98, 100, 102–105, 107, 109–112, 114, 123–125, 128, 131–132, 135–136, 139–140, 142–144, 147, 151, 153, 155, 157, 160–173, 179, 181–182, 184–185, 190–191, 193–194, 196, 204, 206, 208–209, 212–213, 223–226, 234
   jdutil.py984455%37, 55, 91–92, 287, 291, 314, 316–317, 319, 321, 346, 348, 350, 370–372, 374, 376, 378, 381–383, 385, 387, 389, 392–393, 395, 397, 399–400, 402, 405–407, 410–413, 415, 417, 424, 431
   tests.py119496%137–138, 148–149
   util.py330100% 
edtf/natlang
   __init__.py20100% 
   en.py1581193%85, 88, 145, 181–182, 192–193, 218–219, 223, 290
   tests.py10190%211
edtf/parser
   __init__.py40100% 
   edtf_exceptions.py15286%24, 29
   grammar.py133496%365, 368, 370, 376
   parser_classes.py66110683%114–116, 123, 126, 188, 194–198, 205–207, 214–218, 227–229, 234–240, 273, 345, 358–359, 390–394, 397, 412, 415–419, 422–426, 444–446, 474, 483, 546, 560, 564, 600, 608, 612, 659–660, 666, 684–685, 688, 694, 700, 702, 706, 713, 754, 779, 785, 789, 804, 808, 898, 908, 919–920, 922, 929, 932, 943, 948, 953, 989, 992, 998, 1003, 1005–1013, 1028, 1033, 1111, 1116, 1149
   tests.py89198%412
TOTAL153730580% 
An implementation of EDTF format in Python, together with utility functions for parsing natural language date texts, and converting EDTF dates to related Python `date` or `struct_time` objects.