From 4a3de555df18497185a39054d5b40303e14d95f3 Mon Sep 17 00:00:00 2001 From: Leif Johansson Date: Tue, 15 Sep 2020 10:57:55 +0200 Subject: [PATCH] lots of experiments --- examples/mdx.fd | 17 ---------- examples/mdx2.fd | 64 +++++++++++++++++++++++++++++++++++ setup.py | 2 +- src/pyff/api.py | 2 +- src/pyff/builtins.py | 13 ++++--- src/pyff/decorators.py | 25 ++++++++++++++ src/pyff/pipes.py | 2 ++ src/pyff/resource.py | 12 +++++++ src/pyff/samlmd.py | 11 +++--- src/pyff/store.py | 31 +++++++++++------ src/pyff/test/__init__.py | 3 +- src/pyff/test/test_md_main.py | 3 +- src/pyff/utils.py | 24 +++++++++---- 13 files changed, 160 insertions(+), 49 deletions(-) create mode 100644 examples/mdx2.fd diff --git a/examples/mdx.fd b/examples/mdx.fd index 8a79a426..987cf71a 100644 --- a/examples/mdx.fd +++ b/examples/mdx.fd @@ -11,23 +11,6 @@ - examples/links.xrd as links via normalize - http://mds.edugain.org via edugain - select - - fork and merge: - - select: "!//md:EntityDescriptor[md:Extensions[mdrpi:RegistrationInfo[@registrationAuthority='http://www.swamid.se/']]]" - - setattr: - http://pyff.io/collection: swamid-2.0 - - fork and merge: - - select: - - "http://pyff.io/collection=swamid-2.0+http://pyff.io/role=idp" - - setattr: - http://pyff.io/collection: swamid-idp - - fork: - - select as /md/foo-2.0: - - http://pyff.io/collection=swamid-2.0 - - break - - fork: - - select as /md/skolfederation-2.0: - - http://meta01.skolfederation.se/skolfederation-2_0.xml - - break - fork: - select as /ndn: - https://idp.nordu.net/idp/shibboleth diff --git a/examples/mdx2.fd b/examples/mdx2.fd new file mode 100644 index 00000000..8a79a426 --- /dev/null +++ b/examples/mdx2.fd @@ -0,0 +1,64 @@ +- when normalize: + - xslt: + stylesheet: tidy.xsl + - break +- when edugain: + - xslt: + stylesheet: tidy.xsl + - break +- when update: + - load: + - examples/links.xrd as links via normalize + - http://mds.edugain.org via edugain + - select + - fork and merge: + - select: "!//md:EntityDescriptor[md:Extensions[mdrpi:RegistrationInfo[@registrationAuthority='http://www.swamid.se/']]]" + - setattr: + http://pyff.io/collection: swamid-2.0 + - fork and merge: + - select: + - "http://pyff.io/collection=swamid-2.0+http://pyff.io/role=idp" + - setattr: + http://pyff.io/collection: swamid-idp + - fork: + - select as /md/foo-2.0: + - http://pyff.io/collection=swamid-2.0 + - break + - fork: + - select as /md/skolfederation-2.0: + - http://meta01.skolfederation.se/skolfederation-2_0.xml + - break + - fork: + - select as /ndn: + - https://idp.nordu.net/idp/shibboleth + - break + - fork: + - select as /md/swamid-kalmar-1.0: + - https://connect.sunet.se/shibboleth + - https://www.diva-portal.org/shibboleth + - break +- when request: + - select + - pipe: + - when accept application/samlmetadata+xml application/xml: + - pipe: + - when path /md/swamid-kalmar-1.0: + - xslt: + stylesheet: kalmar2.xsl + - break + + - xslt: + stylesheet: tidy.xsl + - first + - finalize: + cacheDuration: PT5H + validUntil: P10D + - sign: + key: sign.key + cert: sign.crt + - emit application/samlmetadata+xml + - break + - when accept application/json: + - discojson + - emit application/json + - break diff --git a/setup.py b/setup.py index 7600011a..e155867b 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ 'six>=1.11.0', 'ipaddr', 'publicsuffix2', - 'redis-collections==0.7.0', + 'redis-collections==0.8.1', 'redis', 'requests', 'requests_cache', diff --git a/src/pyff/api.py b/src/pyff/api.py index 8b418b57..e163888f 100644 --- a/src/pyff/api.py +++ b/src/pyff/api.py @@ -89,7 +89,7 @@ def _fmt(data, accepter): def call(entry): - requests.post('{}/api/call/{}'.format(config.base_url, entry)) + return requests.post('{}/api/call/{}'.format(config.base_url, entry)) def process_handler(request): diff --git a/src/pyff/builtins.py b/src/pyff/builtins.py index faf4bd2a..ec367e0f 100644 --- a/src/pyff/builtins.py +++ b/src/pyff/builtins.py @@ -22,7 +22,7 @@ from .logs import get_log from .pipes import Plumbing, PipeException, PipelineCallback, pipe from .utils import total_seconds, dumptree, safe_write, root, with_tree, duration2timedelta, xslt_transform, \ - validate_document, hash_id, ensure_dir + validate_document, hash_id, dup_tree from .samlmd import sort_entities, iter_entities, annotate_entity, set_entity_attributes, \ discojson_t, set_pubinfo, set_reginfo, find_in_document, entitiesdescriptor, set_nodecountry, resolve_entities from six.moves.urllib_parse import urlparse @@ -211,9 +211,10 @@ def fork(req, *opts): attribute: value - Note that unless you have a select statement before your fork merge you'll be merging into an empty + Note that unless you have a select statement before your fork+merge you'll be merging into an empty active document which with the default merge strategy of replace_existing will result in an empty - active document. To avoid this do a select before your fork, thus: + active document because it is the entities of the parent document that are used to determine which + entities are touched by the merge. To avoid this do a select before your fork, thus: .. code-block:: yaml @@ -226,7 +227,7 @@ def fork(req, *opts): """ nt = None if req.t is not None: - nt = deepcopy(req.t) + nt = dup_tree(req.t) #deepcopy(req.t) ip = Plumbing(pipeline=req.args, pid="%s.fork" % req.plumbing.pid) ireq = Plumbing.Request(ip, req.md, t=nt, scheduler=req.scheduler) @@ -241,6 +242,10 @@ def fork(req, *opts): sn = opts[-1] req.md.store.merge(req.t, ireq.t, strategy_name=sn) + nt = None + ip = None + ireq = None + return req.t diff --git a/src/pyff/decorators.py b/src/pyff/decorators.py index 3e46fe00..b4648f85 100644 --- a/src/pyff/decorators.py +++ b/src/pyff/decorators.py @@ -11,6 +11,31 @@ log = get_log(__name__) +def heapy(trace=False, minsize=0): + def decorating(func): + def new_func(*args, **kwargs): + from gc import collect + collect() + from guppy import hpy + hp = hpy() + hp.setrelheap() + r = func(*args, **kwargs) + collect() + after = hp.heap() + print("-----------------------------") + print(args) + print(kwargs) + print(after) + print("+++++++++++++++++++++++++++++") + if trace and after.size > minsize: + import pdb + pdb.set_trace() + + return r + return new_func + return decorating + + def deprecated(logger=log, reason="Complain to the developer about unspecified code deprecation"): """This is a decorator which can be used to mark functions as deprecated. It will result in a warning being emitted diff --git a/src/pyff/pipes.py b/src/pyff/pipes.py index bbcda557..985990c8 100644 --- a/src/pyff/pipes.py +++ b/src/pyff/pipes.py @@ -7,6 +7,7 @@ import os import yaml from .utils import resource_string, PyffException, is_text +from .decorators import heapy from .logs import get_log log = get_log(__name__) @@ -252,6 +253,7 @@ def process(self, pl): """ return pl.iprocess(self) + #@heapy(trace=True, minsize=100000000) def iprocess(self, req): """The inner request pipeline processor. diff --git a/src/pyff/resource.py b/src/pyff/resource.py index fd6e133d..dadff15f 100644 --- a/src/pyff/resource.py +++ b/src/pyff/resource.py @@ -181,6 +181,8 @@ def __str__(self): return "Resource {} expires at {} using ".format(self.url if self.url is not None else "(root)", self.expire_time) + \ ",".join(["{}={}".format(k, v) for k, v in list(self.opts.items())]) + from .decorators import heapy + @heapy(trace=False) def reload(self, fail_on_error=False): with non_blocking_lock(self.lock): if fail_on_error: @@ -198,6 +200,16 @@ def reload(self, fail_on_error=False): rp.fetcher.join() self.notify() + #import objgraph; + + #import gc + #gc.collect() + #x = objgraph.get_leaking_objects() + #log.debug([type(o) for o in x]) + #import threading + #log.debug(threading.enumerate()) + #import pdb; + #pdb.set_trace() def __len__(self): return len(self.children) diff --git a/src/pyff/samlmd.py b/src/pyff/samlmd.py index 82d92358..25e79f21 100644 --- a/src/pyff/samlmd.py +++ b/src/pyff/samlmd.py @@ -27,7 +27,7 @@ def __init__(self, initial=None): self.add(e) def add(self, value): - self._e[value.get('entityID')] = value + self._e[value.get('entityID')] = True def discard(self, value): entity_id = value.get('entityID') @@ -35,7 +35,7 @@ def discard(self, value): del self._e[entity_id] def __iter__(self): - for e in list(self._e.values()): + for e in list(self._e.keys()): yield e def __len__(self): @@ -80,10 +80,9 @@ def parse_saml_metadata(source, try: t = parse_xml(source, base_url=base_url) t.xinclude() - - expire_time_offset = metadata_expiration(t) - t = check_signature(t, key) + + expire_time_offset = metadata_expiration(t) if cleanup is not None and isinstance(cleanup, list): for cb in cleanup: @@ -1090,4 +1089,4 @@ def _set_nodecountry_in_ext(ext_elt, iso_cc): def diff(t1, t2): s1 = set([e.get('entityID') for e in iter_entities(root(t1))]) s2 = set([e.get('entityID') for e in iter_entities(root(t2))]) - return s1.difference(s2) \ No newline at end of file + return s1.difference(s2) diff --git a/src/pyff/store.py b/src/pyff/store.py index a02e33ba..bcb08d3f 100644 --- a/src/pyff/store.py +++ b/src/pyff/store.py @@ -22,7 +22,7 @@ from .samlmd import EntitySet, iter_entities, entity_attribute_dict, is_sp, is_idp, entity_simple_info, \ object_id, find_merge_strategy, find_entity, entity_simple_summary, entitiesdescriptor, discojson, entity_icon_url from .utils import root, hash_id, avg_domain_distance, load_callable, is_text, b2u, parse_xml, dumptree, \ - LRUProxyDict, hex_digest, redis, is_past_ttl + LRUProxyDict, hex_digest, redis, is_past_ttl, dup_tree import os import shutil @@ -801,9 +801,11 @@ def attribute(self, a): return list(self.index.setdefault('attr', {}).setdefault(a, {}).keys()) def _modify(self, entity, modifier): + entity_id = entity.get('entityID') def _m(idx, vv): - getattr(idx.setdefault(vv, EntitySet()), modifier)(entity) + value_set = idx.setdefault(vv, set()) + getattr(value_set, modifier)(entity_id) for hn in DINDEX: _m(self.index[hn], hash_id(entity, hn, False)) @@ -826,21 +828,28 @@ def _index(self, entity): def _unindex(self, entity): return self._modify(entity, "discard") + def _resolve_one(self, entity_id): + return root(parse_xml(BytesIO(self.entities[entity_id]))) + + def _resolve(self, entity_ids): + for entity_id in entity_ids: + yield self._resolve_one(entity_id) + def _get_index(self, a, v): if a in DINDEX: return self.index[a].get(v, []) else: idx = self.index['attr'].setdefault(a, {}) - entities = idx.get(v, None) - if entities is not None: - return entities + entity_ids = idx.get(v, None) + if entity_ids is not None: + return self._resolve(entity_ids) else: m = re.compile(v) - entities = [] + entity_ids = set() for value, ents in list(idx.items()): if m.match(value): - entities.extend(ents) - return entities + entity_ids.update(ents) + return self._resolve(entity_ids) def reset(self): self.__init__() @@ -854,7 +863,7 @@ def update(self, t, tid=None, etag=None, lazy=True): if relt.tag == "{%s}EntityDescriptor" % NS['md']: self._unindex(relt) self._index(relt) - self.entities[relt.get('entityID')] = relt # TODO: merge? + self.entities[relt.get('entityID')] = dumptree(relt) # TODO: merge? if tid is not None: self.md[tid] = [relt.get('entityID')] elif relt.tag == "{%s}EntitiesDescriptor" % NS['md']: @@ -871,10 +880,10 @@ def lookup(self, key): def _lookup(self, key): if key == 'entities' or key is None: - return list(self.entities.values()) + return [self._resolve_one(entity_id) for entity_id in self.entities.keys()] if key in self.entities: - return [self.entities[key]] + return [self._resolve_one(key)] if '+' in key: key = key.strip('+') diff --git a/src/pyff/test/__init__.py b/src/pyff/test/__init__.py index ff3970c8..62660122 100644 --- a/src/pyff/test/__init__.py +++ b/src/pyff/test/__init__.py @@ -31,6 +31,7 @@ def find_unbound_port(i=0): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.bind(("127.0.0.1", port)) + sock.shutdown() except socket.error: pass return port @@ -144,4 +145,4 @@ def tearDownClass(cls): if os.path.exists(cls.private_keyspec): os.unlink(cls.private_keyspec) if os.path.exists(cls.public_keyspec): - os.unlink(cls.public_keyspec) \ No newline at end of file + os.unlink(cls.public_keyspec) diff --git a/src/pyff/test/test_md_main.py b/src/pyff/test/test_md_main.py index 876307b9..fa0e4a46 100644 --- a/src/pyff/test/test_md_main.py +++ b/src/pyff/test/test_md_main.py @@ -65,7 +65,8 @@ def setUpClass(cls): for i in range(0, 60): try: r = requests.get("http://127.0.0.1:%s/status" % cls.port) - if r.json() and 'running' in r.json()['status']: + j = r.json() + if j and 'running' in j['status']: return print(r.json()) diff --git a/src/pyff/utils.py b/src/pyff/utils.py index 65b10932..991f4733 100644 --- a/src/pyff/utils.py +++ b/src/pyff/utils.py @@ -64,7 +64,7 @@ Image = None -etree.set_default_parser(etree.XMLParser(resolve_entities=False)) +#etree.set_default_parser(etree.XMLParser(resolve_entities=False)) __author__ = 'leifj' @@ -168,6 +168,9 @@ def dumptree(t, pretty_print=False, method='xml', xml_declaration=True): Return a string representation of the tree, optionally pretty_print(ed) (default False) :param t: An ElemenTree to serialize +:param pretty_print: Should we pretty print the output (default False) +:param method: The serialization method to pass to etree.tostring (default 'xml') +:param xml_declaration: Should we include an ?xml declaration (default True) """ return etree.tostring(t, encoding='UTF-8', method=method, xml_declaration=xml_declaration, pretty_print=pretty_print) @@ -694,6 +697,10 @@ def close(self): pass +def dup_tree(t, base_url=None): + return parse_xml(six.BytesIO(dumptree(t)), base_url=base_url) + + def url_get(url): """ Download an URL using a cache and return the response object @@ -711,12 +718,15 @@ def url_get(url): else: retry = Retry(total=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) - s = CachedSession(cache_name="pyff_cache", - backend=config.request_cache_backend, - expire_after=config.request_cache_time, - old_data_on_error=True) - s.mount('http://', adapter) - s.mount('https://', adapter) + if config.caching_enabled: + s = CachedSession(cache_name="pyff_cache", + backend=config.request_cache_backend, + expire_after=config.request_cache_time, + old_data_on_error=True) + s.mount('http://', adapter) + s.mount('https://', adapter) + else: + s = requests.session() headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'} try: