Skip to content

Commit 2dbe61e

Browse files
feat: speed up agenda.ics; cache more agenda data (#10362)
* chore(dev): cprofile middleware settings * feat: precomputed agenda.ics (wip) * feat: precomp filtering support * fix: separately cache versioned hrefs Fixes #10355 * fix: versionless agenda href in agenda / ical * fix: preserve RESCHEDULED output * fix: fixup to agree with original output * feat: separate agenda cache, cache old meetings * feat: agenda refresh tasks * chore: undo accidental commit * chore: remove debug parameter * fix: convert session ID to int for comparison * test: update/fix tests, rename new task * refactor: rename task to have _task suffix Also changes a log msg so it won't contain "None" awkwardly * feat: no exceptions from agenda_data_refresh_task * test: explanatory comment * ci: agenda cache for k8s / testcrawl
1 parent 752c6e5 commit 2dbe61e

File tree

7 files changed

+396
-73
lines changed

7 files changed

+396
-73
lines changed

ietf/doc/models.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,14 @@ def revisions_by_newrevisionevent(self):
239239
return revisions
240240

241241
def get_href(self, meeting=None):
242-
return self._get_ref(meeting=meeting,meeting_doc_refs=settings.MEETING_DOC_HREFS)
242+
return self._get_ref(meeting=meeting, versioned=True)
243243

244244

245245
def get_versionless_href(self, meeting=None):
246-
return self._get_ref(meeting=meeting,meeting_doc_refs=settings.MEETING_DOC_GREFS)
246+
return self._get_ref(meeting=meeting, versioned=False)
247247

248248

249-
def _get_ref(self, meeting=None, meeting_doc_refs=settings.MEETING_DOC_HREFS):
249+
def _get_ref(self, meeting=None, versioned=True):
250250
"""
251251
Returns an url to the document text. This differs from .get_absolute_url(),
252252
which returns an url to the datatracker page for the document.
@@ -255,12 +255,16 @@ def _get_ref(self, meeting=None, meeting_doc_refs=settings.MEETING_DOC_HREFS):
255255
# the earlier resolution order, but there's at the moment one single
256256
# instance which matches this (with correct results), so we won't
257257
# break things all over the place.
258-
if not hasattr(self, '_cached_href'):
258+
cache_attr = "_cached_href" if versioned else "_cached_versionless_href"
259+
if not hasattr(self, cache_attr):
259260
validator = URLValidator()
260261
if self.external_url and self.external_url.split(':')[0] in validator.schemes:
261262
validator(self.external_url)
262263
return self.external_url
263264

265+
meeting_doc_refs = (
266+
settings.MEETING_DOC_HREFS if versioned else settings.MEETING_DOC_GREFS
267+
)
264268
if self.type_id in settings.DOC_HREFS and self.type_id in meeting_doc_refs:
265269
if self.meeting_related():
266270
self.is_meeting_related = True
@@ -312,8 +316,8 @@ def _get_ref(self, meeting=None, meeting_doc_refs=settings.MEETING_DOC_HREFS):
312316

313317
if href.startswith('/'):
314318
href = settings.IDTRACKER_BASE_URL + href
315-
self._cached_href = href
316-
return self._cached_href
319+
setattr(self, cache_attr, href)
320+
return getattr(self, cache_attr)
317321

318322
def set_state(self, state):
319323
"""Switch state type implicit in state to state. This just

ietf/meeting/tasks.py

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
# Copyright The IETF Trust 2024-2025, All Rights Reserved
1+
# Copyright The IETF Trust 2024-2026, All Rights Reserved
22
#
33
# Celery task definitions
44
#
55
import datetime
66

7-
from celery import shared_task
8-
# from django.db.models import QuerySet
7+
from itertools import batched
8+
9+
from celery import shared_task, chain
10+
from django.db.models import IntegerField
11+
from django.db.models.functions import Cast
912
from django.utils import timezone
1013

1114
from ietf.utils import log
@@ -19,9 +22,56 @@
1922
from .utils import fetch_attendance_from_meetings
2023

2124

25+
@shared_task
26+
def agenda_data_refresh_task(num=None):
27+
"""Refresh agenda data for one plenary meeting
28+
29+
If `num` is `None`, refreshes data for the current meeting.
30+
"""
31+
log.log(
32+
f"Refreshing agenda data for {f"IETF-{num}" if num else "current IETF meeting"}"
33+
)
34+
try:
35+
generate_agenda_data(num, force_refresh=True)
36+
except Exception as err:
37+
# Log and swallow exceptions so failure on one meeting won't break a chain of
38+
# tasks. This is used by agenda_data_refresh_all_task().
39+
log.log(f"ERROR: Refreshing agenda data failed for num={num}: {err}")
40+
41+
2242
@shared_task
2343
def agenda_data_refresh():
24-
generate_agenda_data(force_refresh=True)
44+
"""Deprecated. Use agenda_data_refresh_task() instead.
45+
46+
TODO remove this after switching the periodic task to the new name
47+
"""
48+
log.log("Deprecated agenda_data_refresh task called!")
49+
agenda_data_refresh_task()
50+
51+
52+
@shared_task
53+
def agenda_data_refresh_all_task(*, batch_size=10):
54+
"""Refresh agenda data for all plenary meetings
55+
56+
Executes as a chain of tasks, each computing up to `batch_size` meetings
57+
in a single task.
58+
"""
59+
meeting_numbers = sorted(
60+
Meeting.objects.annotate(
61+
number_as_int=Cast("number", output_field=IntegerField())
62+
)
63+
.filter(type_id="ietf", number_as_int__gt=64)
64+
.values_list("number_as_int", flat=True)
65+
)
66+
# Batch using chained maps rather than celery.chunk so we only use one worker
67+
# at a time.
68+
batched_task_chain = chain(
69+
*(
70+
agenda_data_refresh_task.map(nums)
71+
for nums in batched(meeting_numbers, batch_size)
72+
)
73+
)
74+
batched_task_chain.delay()
2575

2676

2777
@shared_task
@@ -55,7 +105,9 @@ def proceedings_content_refresh_task(*, all=False):
55105
@shared_task
56106
def fetch_meeting_attendance_task():
57107
# fetch most recent two meetings
58-
meetings = Meeting.objects.filter(type="ietf", date__lte=timezone.now()).order_by("-date")[:2]
108+
meetings = Meeting.objects.filter(type="ietf", date__lte=timezone.now()).order_by(
109+
"-date"
110+
)[:2]
59111
try:
60112
stats = fetch_attendance_from_meetings(meetings)
61113
except RuntimeError as err:
@@ -64,16 +116,19 @@ def fetch_meeting_attendance_task():
64116
for meeting, meeting_stats in zip(meetings, stats):
65117
log.log(
66118
"Fetched data for meeting {:>3}: {:4d} created, {:4d} updated, {:4d} deleted, {:4d} processed".format(
67-
meeting.number, meeting_stats['created'], meeting_stats['updated'], meeting_stats['deleted'],
68-
meeting_stats['processed']
119+
meeting.number,
120+
meeting_stats["created"],
121+
meeting_stats["updated"],
122+
meeting_stats["deleted"],
123+
meeting_stats["processed"],
69124
)
70125
)
71126

72127

73128
def _select_meetings(
74129
meetings: list[str] | None = None,
75130
meetings_since: str | None = None,
76-
meetings_until: str | None = None
131+
meetings_until: str | None = None,
77132
): # nyah
78133
"""Select meetings by number or date range"""
79134
# IETF-1 = 1986-01-16
@@ -130,15 +185,15 @@ def _select_meetings(
130185
@shared_task
131186
def resolve_meeting_materials_task(
132187
*, # only allow kw arguments
133-
meetings: list[str] | None=None,
134-
meetings_since: str | None=None,
135-
meetings_until: str | None=None
188+
meetings: list[str] | None = None,
189+
meetings_since: str | None = None,
190+
meetings_until: str | None = None,
136191
):
137192
"""Run materials resolver on meetings
138-
193+
139194
Can request a set of meetings by number by passing a list in the meetings arg, or
140195
by range by passing an iso-format timestamps in meetings_since / meetings_until.
141-
To select all meetings, set meetings_since="zero" and omit other parameters.
196+
To select all meetings, set meetings_since="zero" and omit other parameters.
142197
"""
143198
meetings_qs = _select_meetings(meetings, meetings_since, meetings_until)
144199
for meeting in meetings_qs.order_by("date"):
@@ -155,21 +210,23 @@ def resolve_meeting_materials_task(
155210
f"meeting {meeting.number}: {err}"
156211
)
157212
else:
158-
log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.")
213+
log.log(
214+
f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds."
215+
)
159216

160217

161218
@shared_task
162219
def store_meeting_materials_as_blobs_task(
163220
*, # only allow kw arguments
164221
meetings: list[str] | None = None,
165222
meetings_since: str | None = None,
166-
meetings_until: str | None = None
223+
meetings_until: str | None = None,
167224
):
168225
"""Push meeting materials into the blob store
169226
170227
Can request a set of meetings by number by passing a list in the meetings arg, or
171228
by range by passing an iso-format timestamps in meetings_since / meetings_until.
172-
To select all meetings, set meetings_since="zero" and omit other parameters.
229+
To select all meetings, set meetings_since="zero" and omit other parameters.
173230
"""
174231
meetings_qs = _select_meetings(meetings, meetings_since, meetings_until)
175232
for meeting in meetings_qs.order_by("date"):
@@ -187,4 +244,5 @@ def store_meeting_materials_as_blobs_task(
187244
)
188245
else:
189246
log.log(
190-
f"Blobs created in {(timezone.now() - mark).total_seconds():0.3f} seconds.")
247+
f"Blobs created in {(timezone.now() - mark).total_seconds():0.3f} seconds."
248+
)

ietf/meeting/tests_tasks.py

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,63 @@
55
from ietf.utils.test_utils import TestCase
66
from ietf.utils.timezone import date_today
77
from .factories import MeetingFactory
8-
from .tasks import proceedings_content_refresh_task, agenda_data_refresh
8+
from .tasks import (
9+
proceedings_content_refresh_task,
10+
agenda_data_refresh_task,
11+
agenda_data_refresh_all_task,
12+
)
913
from .tasks import fetch_meeting_attendance_task
1014

1115

1216
class TaskTests(TestCase):
1317
@patch("ietf.meeting.tasks.generate_agenda_data")
14-
def test_agenda_data_refresh(self, mock_generate):
15-
agenda_data_refresh()
18+
def test_agenda_data_refresh_task(self, mock_generate):
19+
agenda_data_refresh_task()
1620
self.assertTrue(mock_generate.called)
17-
self.assertEqual(mock_generate.call_args, call(force_refresh=True))
21+
self.assertEqual(mock_generate.call_args, call(None, force_refresh=True))
22+
23+
mock_generate.reset_mock()
24+
mock_generate.side_effect = RuntimeError
25+
try:
26+
agenda_data_refresh_task()
27+
except Exception as err:
28+
self.fail(
29+
f"agenda_data_refresh_task should not raise exceptions (got {repr(err)})"
30+
)
31+
32+
@patch("ietf.meeting.tasks.agenda_data_refresh_task")
33+
@patch("ietf.meeting.tasks.chain")
34+
def test_agenda_data_refresh_all_task(self, mock_chain, mock_agenda_data_refresh):
35+
# Patch the agenda_data_refresh_task task with a mock whose `.map` attribute
36+
# converts its argument, which is expected to be an iterator, to a list
37+
# and returns it. We'll use this to check that the expected task chain
38+
# was set up, but we don't actually run any celery tasks.
39+
mock_agenda_data_refresh.map.side_effect = lambda x: list(x)
40+
41+
meetings = MeetingFactory.create_batch(5, type_id="ietf")
42+
numbers = sorted(int(m.number) for m in meetings)
43+
agenda_data_refresh_all_task(batch_size=2)
44+
self.assertTrue(mock_chain.called)
45+
# The lists in the call() below are the output of the lambda we patched in
46+
# via mock_agenda_data_refresh.map.side_effect above. I.e., this tests that
47+
# map() was called with the correct batched data.
48+
self.assertEqual(
49+
mock_chain.call_args,
50+
call(
51+
[numbers[0], numbers[1]],
52+
[numbers[2], numbers[3]],
53+
[numbers[4]],
54+
),
55+
)
56+
self.assertEqual(mock_agenda_data_refresh.call_count, 0)
57+
self.assertEqual(mock_agenda_data_refresh.map.call_count, 3)
1858

1959
@patch("ietf.meeting.tasks.generate_proceedings_content")
2060
def test_proceedings_content_refresh_task(self, mock_generate):
2161
# Generate a couple of meetings
2262
meeting120 = MeetingFactory(type_id="ietf", number="120") # 24 * 5
2363
meeting127 = MeetingFactory(type_id="ietf", number="127") # 24 * 5 + 7
24-
64+
2565
# Times to be returned
2666
now_utc = datetime.datetime.now(tz=datetime.UTC)
2767
hour_00_utc = now_utc.replace(hour=0)
@@ -34,19 +74,19 @@ def test_proceedings_content_refresh_task(self, mock_generate):
3474
self.assertEqual(mock_generate.call_count, 1)
3575
self.assertEqual(mock_generate.call_args, call(meeting120, force_refresh=True))
3676
mock_generate.reset_mock()
37-
77+
3878
# hour 01 - should call no meetings
3979
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
4080
proceedings_content_refresh_task()
4181
self.assertEqual(mock_generate.call_count, 0)
42-
82+
4383
# hour 07 - should call meeting with number % 24 == 0
4484
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_07_utc):
4585
proceedings_content_refresh_task()
4686
self.assertEqual(mock_generate.call_count, 1)
4787
self.assertEqual(mock_generate.call_args, call(meeting127, force_refresh=True))
4888
mock_generate.reset_mock()
49-
89+
5090
# With all=True, all should be called regardless of time. Reuse hour_01_utc which called none before
5191
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
5292
proceedings_content_refresh_task(all=True)
@@ -61,10 +101,10 @@ def test_fetch_meeting_attendance_task(self, mock_fetch_attendance):
61101
MeetingFactory(type_id="ietf", date=today - datetime.timedelta(days=3)),
62102
]
63103
data = {
64-
'created': 1,
65-
'updated': 2,
66-
'deleted': 0,
67-
'processed': 3,
104+
"created": 1,
105+
"updated": 2,
106+
"deleted": 0,
107+
"processed": 3,
68108
}
69109

70110
mock_fetch_attendance.return_value = [data, data]

0 commit comments

Comments
 (0)