From 5640504ace97b17f43d942ac94c7525b50558a2e Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Tue, 16 Dec 2025 13:52:13 +0530 Subject: [PATCH 01/15] chore: updated app_sdk version --- shuffle-tools/1.2.0/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shuffle-tools/1.2.0/requirements.txt b/shuffle-tools/1.2.0/requirements.txt index 89ca3320..f0d535ba 100644 --- a/shuffle-tools/1.2.0/requirements.txt +++ b/shuffle-tools/1.2.0/requirements.txt @@ -8,5 +8,4 @@ json2xml==5.0.5 ipaddress==1.0.23 google.auth==2.37.0 paramiko==3.5.0 -shuffle-sdk - +shuffle-sdk==0.0.31 From 88c3aa96c6b312ed22103208d926eab1669a5cb4 Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Tue, 16 Dec 2025 14:15:09 +0530 Subject: [PATCH 02/15] fix: version conflict --- shuffle-tools/1.2.0/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shuffle-tools/1.2.0/requirements.txt b/shuffle-tools/1.2.0/requirements.txt index f0d535ba..950edceb 100644 --- a/shuffle-tools/1.2.0/requirements.txt +++ b/shuffle-tools/1.2.0/requirements.txt @@ -2,7 +2,7 @@ ioc_finder==7.3.0 py7zr==0.22.0 rarfile==4.2 pyminizip==0.2.6 -requests==2.32.4 +requests==2.32.3 xmltodict==0.14.2 json2xml==5.0.5 ipaddress==1.0.23 From 20f1a53774ef37479dcee73fdffd139a19085f50 Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Tue, 16 Dec 2025 14:30:16 +0530 Subject: [PATCH 03/15] fix: added missing packages --- shuffle-tools/1.2.0/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shuffle-tools/1.2.0/Dockerfile b/shuffle-tools/1.2.0/Dockerfile index 40675132..fac3aae2 100644 --- a/shuffle-tools/1.2.0/Dockerfile +++ b/shuffle-tools/1.2.0/Dockerfile @@ -5,7 +5,7 @@ FROM frikky/shuffle:app_sdk as base FROM base as builder # Install all alpine build tools needed for our pip installs -RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git +RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git zlib-dev python3-dev # Install all of our pip packages in a single directory that we can copy to our base image later RUN mkdir /install From 4127de12f23ec823f966a9f021c900887a80af6b Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Tue, 16 Dec 2025 15:07:20 +0530 Subject: [PATCH 04/15] fix: remove the pyzipper lib --- shuffle-tools/1.2.0/requirements.txt | 2 +- shuffle-tools/1.2.0/src/app.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/shuffle-tools/1.2.0/requirements.txt b/shuffle-tools/1.2.0/requirements.txt index 950edceb..4fe45011 100644 --- a/shuffle-tools/1.2.0/requirements.txt +++ b/shuffle-tools/1.2.0/requirements.txt @@ -1,7 +1,7 @@ ioc_finder==7.3.0 py7zr==0.22.0 rarfile==4.2 -pyminizip==0.2.6 +pyzipper==0.3.6 requests==2.32.3 xmltodict==0.14.2 json2xml==5.0.5 diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 83949ade..a50e405b 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -30,7 +30,7 @@ from google.auth import jwt import py7zr -import pyminizip +import pyzipper import rarfile import requests import tarfile @@ -1444,9 +1444,19 @@ def create_archive(self, file_ids, fileformat, name, password=None): if fileformat == "zip": archive_name = "archive.zip" if not name else name - pyminizip.compress_multiple( - paths, [], archive.name, password, 5 - ) + + pwd = password if isinstance(password, (bytes, bytearray)) else password.encode() + + with pyzipper.AESZipFile( + archive.name, + "w", + compression=pyzipper.ZIP_DEFLATED + ) as zf: + zf.setpassword(pwd) + zf.setencryption(pyzipper.WZ_AES, nbits=256) + + for path in paths: + zf.write(path, arcname=os.path.basename(path)) elif fileformat == "7zip": archive_name = "archive.7z" if not name else name From f7940e2d56acc2b170cf5fb3bf415ff01abd520e Mon Sep 17 00:00:00 2001 From: Frikky Date: Thu, 18 Dec 2025 14:42:34 +0100 Subject: [PATCH 05/15] Changed cache names to datastore. Retaining functions for backwards compatibility --- shuffle-tools/1.2.0/api.yaml | 185 ++++++++++++++++++++++++++------- shuffle-tools/1.2.0/src/app.py | 14 ++- 2 files changed, 158 insertions(+), 41 deletions(-) diff --git a/shuffle-tools/1.2.0/api.yaml b/shuffle-tools/1.2.0/api.yaml index 301b9f51..9fdb9cf8 100644 --- a/shuffle-tools/1.2.0/api.yaml +++ b/shuffle-tools/1.2.0/api.yaml @@ -61,41 +61,7 @@ actions: schema: type: string - - name: check_cache_contains - description: Checks Shuffle cache whether a user-provided key contains a value. Returns ALL the values previously appended. - parameters: - - name: key - description: The key to get - required: true - multiline: false - example: "alert_ids" - schema: - type: string - - name: value - description: The value to check for and append if applicable - required: true - multiline: false - example: "1208301599081" - schema: - type: string - - name: append - description: Whether to auto-append the value if it doesn't exist in the cache - required: true - options: - - true - - false - multiline: false - example: "timestamp" - schema: - type: string - - name: category - description: The category to get the value from. Not required. - required: false - multiline: false - example: "tickets" - schema: - type: string - - name: get_cache_value + - name: get_datastore_value description: Get a value saved to your organization in Shuffle parameters: - name: key @@ -115,7 +81,7 @@ actions: returns: schema: type: string - - name: set_cache_value + - name: set_datastore_value description: Set a value to be saved to your organization in Shuffle. parameters: - name: key @@ -142,7 +108,8 @@ actions: returns: schema: type: string - - name: delete_cache_value + + - name: delete_datastore_value description: Delete a value saved to your organization in Shuffle parameters: - name: key @@ -569,6 +536,13 @@ actions: example: "EventID,username\n4137,frikky" schema: type: string + - name: category + description: The category the file belongs to + required: false + multiline: false + example: "yara-rules" + schema: + type: string - name: download_remote_file description: Downloads a file from a URL parameters: @@ -1269,6 +1243,143 @@ actions: returns: schema: type: string + + - name: check_datastore_contains + description: We recommend "Search datastore category" instead. Checks Shuffle datastore whether a user-provided key contains a value. Returns ALL the values previously appended. + parameters: + - name: key + description: The key to get + required: true + multiline: false + example: "alert_ids" + schema: + type: string + - name: value + description: The value to check for and append if applicable + required: true + multiline: false + example: "1208301599081" + schema: + type: string + - name: append + description: Whether to auto-append the value if it doesn't exist in the cache + required: true + options: + - true + - false + multiline: false + example: "timestamp" + schema: + type: string + - name: category + description: The category to get the value from. Not required. + required: false + multiline: false + example: "tickets" + schema: + type: string + - name: get_cache_value + description: Get a value saved to your organization in Shuffle. Deprecated for "get_datastore_value" + parameters: + - name: key + description: The key to get + required: true + multiline: false + example: "timestamp" + schema: + type: string + - name: category + description: The category to get the value from. Not required. + required: false + multiline: false + example: "tickets" + schema: + type: string + returns: + schema: + type: string + - name: delete_cache_value + description: Delete a value saved to your organization in Shuffle. Deprecated for "delete_datastore_value" + parameters: + - name: key + description: The key to delete + required: true + multiline: false + example: "timestamp" + schema: + type: string + - name: category + description: The category to get the value from. Not required. + required: false + multiline: false + example: "tickets" + schema: + type: string + returns: + schema: + type: string + + - name: set_cache_value + description: Set a value to be saved to your organization in Shuffle. Deprecated for "set_datastore_value" + parameters: + - name: key + description: The key to set the value for + required: true + multiline: false + example: "timestamp" + schema: + type: string + - name: value + description: The value to set + required: true + multiline: true + example: "1621959545" + schema: + type: string + - name: category + description: The category to get the value from. Not required. + required: false + multiline: false + example: "tickets" + schema: + type: string + returns: + schema: + type: string + - name: check_cache_contains + description: Checks Shuffle cache whether a user-provided key contains a value. Returns ALL the values previously appended. Deprecated for "check datastore contains" + parameters: + - name: key + description: The key to get + required: true + multiline: false + example: "alert_ids" + schema: + type: string + - name: value + description: The value to check for and append if applicable + required: true + multiline: false + example: "1208301599081" + schema: + type: string + - name: append + description: Whether to auto-append the value if it doesn't exist in the cache + required: true + options: + - true + - false + multiline: false + example: "timestamp" + schema: + type: string + - name: category + description: The category to get the value from. Not required. + required: false + multiline: false + example: "tickets" + schema: + type: string #- name: parse_ioc_new # description: Parse IOC's based on https://github.com/fhightower/ioc-finder # parameters: diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 83949ade..284450a1 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -1125,7 +1125,7 @@ def delete_file(self, file_id): ) return ret.text - def create_file(self, filename, data): + def create_file(self, filename, data, category=""): try: if str(data).startswith("b'") and str(data).endswith("'"): data = data[2:-1] @@ -1144,6 +1144,7 @@ def create_file(self, filename, data): filedata = { "filename": filename, "data": data, + "namespace": category, } fileret = self.set_files([filedata]) @@ -1158,8 +1159,9 @@ def list_file_category_ids(self, file_category): return self.get_file_category_ids(file_category) # Input is WAS a file, hence it didn't get the files - def get_file_value(self, filedata): - filedata = self.get_file(filedata) + # Category doesn't matter as it uses file ID, which is unique anyway + def get_file_value(self, filedata, category=""): + filedata = self.get_file(filedata, category) if filedata is None: return { "success": False, @@ -1190,7 +1192,7 @@ def get_file_value(self, filedata): "size": len(filedata["data"]), } - def download_remote_file(self, url, custom_filename=""): + def download_remote_file(self, url, custom_filename="", category=""): ret = requests.get(url, verify=False) # nosec filename = url.split("/")[-1] if "?" in filename: @@ -1204,6 +1206,7 @@ def download_remote_file(self, url, custom_filename=""): { "filename": filename, "data": ret.content, + "namespace": category, } ] ) @@ -1827,6 +1830,9 @@ def escape_html(self, input_data): result = markupsafe.escape(mapping) return mapping + def check_datastore_contains(self, key, value, append, category=""): + return check_cache_contains(self, key, value, append, category) + def check_cache_contains(self, key, value, append, category=""): org_id = self.full_execution["workflow"]["execution_org"]["id"] url = "%s/api/v1/orgs/%s/get_cache" % (self.url, org_id) From b7d9a2f7146892443e9766ba5510f8c61d1517c3 Mon Sep 17 00:00:00 2001 From: Frikky Date: Thu, 18 Dec 2025 15:30:20 +0100 Subject: [PATCH 06/15] Added a get_ioc function to shuffle tools that usees datastore locally --- shuffle-tools/1.2.0/api.yaml | 22 +++++++++++++++ shuffle-tools/1.2.0/src/app.py | 50 +++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/shuffle-tools/1.2.0/api.yaml b/shuffle-tools/1.2.0/api.yaml index 9fdb9cf8..ebbf7c3e 100644 --- a/shuffle-tools/1.2.0/api.yaml +++ b/shuffle-tools/1.2.0/api.yaml @@ -156,6 +156,7 @@ actions: returns: schema: type: string + #- name: send_email_shuffle # description: Send an email from Shuffle # parameters: @@ -1400,6 +1401,27 @@ actions: # returns: # schema: # type: string + # + - name: get_ioc + description: Get IOC's saved to your organization in Shuffle + parameters: + - name: ioc + description: The IOC to look for in Shuffle's datastore + required: true + multiline: true + example: "timestamp" + schema: + type: string + - name: data_type + description: The data type to get the IOC from. Discovered if not passed. + required: false + multiline: false + example: "ip" + schema: + type: string + returns: + schema: + type: string large_image: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAK4AAACuCAYAAACvDDbuAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAABmJLR0QA/wD/AP+gvaeTAAAAB3RJTUUH5AgXDjM6hEZGWwAAD+lJREFUeNrtXb/vJTcRH7/v3iVBCqRBiCAQAtHwq4AWRElHwX8AoqbmXwDRpiH/QyQkGoogUSAhKIKUAE1IdSRSREhQQk7c3XtD8X55vePxjNfe3bk3H+nu+96uPf54POtnj8fe8OQX30JwOIxhtzYBh6MGOsPF0z9p2iWwpd8LjX6W5vWUYaiqlBuvLT5b5TQDPlRwmMSAABBg+kCer+XuAeQf4tL9tAxJ/hIfZGSm8rhyEfjytfxr9FeSX+KjvVfipNVpWlaPNhsAEPCS7Ao8FYnRlbO4ksLnjiSQvIanv4FNjwJ5pXIlMq6MQpIqqPnQKQKbjuPDtZlG55o6UHXWtVncZZTbbNBVB1P5dJYguCbJJ1WjOG8PVOioSm5HPrVt1rwuyN+K+PSZnNV1M/MmEFubfFjjU9tmK9XBJ2cOk3DDdZiEG67DJOrGuA7HyvAe12ESAxa73KPrN1z8gUikCCdvcD5NXnpQpA8nNhh9m5Yn4ZMrV8dHV/8a/dRA0x419a3lI9GBtM2GcrGYFXRNUU5TyluTOpdXwqeUt6YOpby9DUTLZylOcRlzdBTf2yV3ZBFOmKSHQh5KpjSSSpqG4s6VkUubqw8W8knTSnWk0Y+2jF5tlmuDUloJn6T8gRVcEpJ+3srChHSNt8RJsq4p+S41LC13KTcu/RJt1pLPKY1Pzhwm4YbrMAk3XIdJTMe4aeCjJhBVk0YiQ1MWZHhLgmO5QNVWfKRlavlIIQnurQmcnaMjSbBxhtMwYUxODpLcl2tUhvPlNE6VkiuoFVLXKT6ZfBjxRIIzOSlgWpLSB8uZ0g3BjeVDlFGEos0mfKKL7CQrY2ES7pM2i/OX22w4/sWReEhEnUOTxx3a+FrawQGZh04/rWe6oJBKo5zT4zLjPHE9ZHym5YzToogzfQcmfLgOhuLF/Sjm2izVDyXnrKtcmmmdaKumf+RyCw5Xn7OmzQaJF0fiEZG6BjXpYUYaSVkaPrXeHe4eVaZEr3Prqrmmrbc2T8lrmOMjn5xJHeJLYkk+PfzNTxOflrwF0EeHbU0Zt2wsW+PTkncB7g5zmMSwzUfS4eDhPa7DJK5jXGorsnZxonbRIbeAoOUjkUvlp+qxFp9YNuWL0nBqsVCkqUsrHQnuX+Nx5/qcJDI0kWgtJh7ihYCN8aG+13DqOXlbWUfD+fN0AUEmp3RcUWlVEwCynb5ssYLnxHViJT6ULCykb8EnzUfpqBWfVAdcnt5tprGhIe10WnjHpB2FtMPWcpM66yXyOad4Lz4Srq34SHhwZfRos1w9Y/jkzGESvj3dYRLe4zpMwg3XYRJuuA6T4M/Hzfk/OGd9OP2HOE2f8wtBlCebJrkfp+Gc3AGmiSiuaVlpwkmajL4osPUm9FMqIzBOJolfjGuzEtdUwWl53Dm7Eh9pzIdps+FiYJyi1N+Rvs/6OLCQBul8Ip8R08ik3EwhLZz1Wv8XmU7ZZqX7OT2gUIB2oaRBm+2ovDm5nM+ulEeiD8yka8UnJ1PCP82r9YWW8iCU5XO8W/PhPmvllNKW7lEyszsgNKuzkspJFZFL15uPtIweq7A1xiKpz1J8tGXP+dE53/fJmcMk6hcgJO8XqokEKi5uYzTG29LqSev95JqyKsoOOxjNpKQBD7VFc5GBJRsi+NQHkkv6+7m/UxTufwLCCy+CbAruyOLDdwEf/uf6vbbNJukzlogZC6wMdhAcM7ohHPawe/GrcO+HPwe4u782G7sIAE9++0vYv/YKwO6usfCaka0etgwXAGB3D8JznwIYnlmbiW0M92FbQy0d+MmZ3Xo5JDDcvuXJ2ZYqtyUuTwuM6nSXctcufHCOZqkjPScXhbIcdeD0XUpfKyNNy8nlyhuozLkM8XxR6pjm7tc4Fdx620I7lWq10JCm0ZanWoBwm3FsBe1WznpadbTg4A9PI2xx7FUKHopQjg7TKqNnpbioIUcFUGUsy1CS8fFYBYdJuOE6TMIN12ESgyiKiwO1bQOJe1w+6p42Etmhwmi6kLZXfC2G9IUj2vulY2wIPrv4onRhIXcRqS0DiWxkhF0uIb37wG22LRCSuVCyekC2GSXj9CG3YyT+krWh+KPAhkTvgGDKqbqnWbBwY+2Pnm3Wy4aMRYc1MuPDvp0skwgAh8PaJGbh5k4kx0f/hce/ewnw/QenXQCTFJDfQy45PzFNn5NHsoPy/u6gzE+nObzz91P9Z+6kWAm2zg6bDMoq8OQxHN78Axze/htAaB1EbQhhdzyfgRqIGoCxoUIjhDuA3ZDpcR0W4C3nMInbNVw7v4oOAsehArVFPL0uOjMM+DlM+pk7t7/BDuwcJsM6gcM7WweOX05nFCHNi12ASRfLo3QaX9O0GWTylOTnZIMwf4YPPTlD4iMm7aZwAGOUf3Rf48wjHNzVOMkKFA8pp0RHZ1mjdihs5R61PWbsWlphgs/E5gptNvFfSLY8QPk7dVbh+UNg8qfnJsZ8Bo0hzF0Y2Nqvc0s+Vbs5YL5OLfPRcorT2hvjtuxyHWZhzHCX6AMcFtB2B0RvtKZqqe6OEYz1uA7HEbdruN7ZmsZtGq4brXnQhlsbLFkDrY9mC9giH41/dSlONfeEIBcgss7nXopInPdkYN95J3XD1bMgkJUNFOxsDNLgyiynhYyX5dnAhnLyhzmO4V7IO8+xyZEgx5UqvJ41rOUTdhBOr2w6KjZc+B1FBkLGVUoAABQEcmPu6rPPw73v/gh2n/wMANYEhAd4/NqvYf/Wn5pEyPW2IUrOzQWSHyHdkEJgN8D97/0Edp/7GgDu9fnDDvD9t+HRqy8BPvxQ9i6xEXUEuPcMDF//Puw+/aVqDewfvA77f/zx9M40e7jNeNw5CDu4++K34e4r36kWcXj3TYDfvwz8D79ml1clDPuxx9FhuUik0rblVihFWLX+7ZFEXE2ioLBNg9fUSRopVsOjJbioskZlDuyAvmflpOWsOUNu/cBQ8jW/1A0np11RG+GjwG36cQHqFWnBcG4Axgx37d/I1uXXcvCnx6BXoQXf3mOAzvVpooJzaOcWdKBH1fZ07dCsFZpNgmfZbaOJ2dxnpwkNFC3C9MBcGxo0OugxwV8LWKm5lg9sFQdszKGhLAla2dCuduuOZcypx+UXdk0OK5e/hXKNTc4cjiPGhtvTX1njI6Z2+vbuKtaKspLooXdkXs1u5yUR7/LdROMsraSSIfTa6pqWodE9Mvla6sCI8d7uUMEXIEzjdg3XYRr2osOePIbDR+9BGO7re78QAD/+AODwpK5sBDg6dGyGAtL1sYnLGDe3+2BNTNycYQf7B2/Aw5d/XB9HejjA4YN3jgHUNQ132MOTv/wG9v98A+CgFBCO/+FH/wJ89PBaSY1OULZzQyQL2skayVwg/7Dk3Ky2IlcEgEcfw/7dt+YJnRP1f9jDoz+/AvM0FU4c1u8mes59e+ZXDhXmPE+tForD+lH73Q6EluiozfaldnzWQUWQzdprPk87lg44nkTKN+DT/10S7lW4VYz8wWucOTAPtl5e4mgfjmu0/b3HdZiEG67DJNxwbxlGhwkAuZeXAJS3Qpfemq7dds1tS5dsbc6dAyQpS5uGe+lKrJLSGUqlCb2GcwUuCxBzt71T2/g7t9mQniofv0yjWOtMYdSLM6Sy0pd5iLdFSQtUyiJtRnjmGOdhqq5bo5WzUXAYzns2Lu2tjaqb0WaTHRBrR9cvEVG4VF3WkLsGnzXqohzjbk3dt4hG/jDDxy8BLL5y5miBZi1wa9vT14dJ0o2qft6/1GhQZ1SV9uJxd3cQ7j+XD7RJ40JK38/XAPKz4ly+OG+KwOTDwn0uDSKEZ58/vgH+hmHLcA97uPvCN+G5H/wMoCaQ/KkAAtzdg/DCZ9cmsipsGS4ce5u7z38DYHhmbTL2YfjBH28DOM80s+MoxllVvfkwKudSbiL0dB0NTya2iGpNYmIzl+/EdexjQ8PEGE4FhdPHMAlbLhcsdWaPnfDEAxQJnbx53TEPJ51j3N7CrEfbSNt+arzXt57X2RBx94LsUGHOGRQtF7Fa8HFQQOabJmc5XQ8b8iAbh0mYNFzvdefD+nRhyPowqWitc2VbRyutGCF18+ilU2mEXWX51zFuKbqlZ/RLy0gixzagiS6sgL2hghuwAywarsMBxgzXO9u2sBzZWHwHRLwrQ5rWYQBIfuwCKnZJEpvEYSg9dRoncnejtdxFbBRLqFQzr5fSudH3nDmOaH26yHIwNcZ1NIZNmwWArYU1Fg8HDLB/7wH879VfAey2Rd0a9g/+2ubUyZUOdAz//umXjT136GPd2cDNnM9bC4Pd1gbOx3WsDh/jOkzCDddhEpcjmKiFhvGLQwDitJNrYTz05H7MS+N56hiq0mbYCfeIj2STb2s+cSJEOrguJ4fScaneOW7kOWZJm4VCmaPFg8wKgcSGuLpzR49Rerm8vIRaaECgvyB1Tbl9qOZoMiykHeVhVoZKwW9N+CSJuPwsH4YY12aTa5TxYyZPpsxSDG/Rhgp1lyxUnK/7UMFhEm64DpNIlnzTAdXcsJml8rdO1yt/K+R45EJUluS9zHaWITuQJb9rsVT+HvuKe+RvhdIIcE3ey4Rj+VDBYRJuuA6TcMN1mMT15SWMZ5h10Oc86+dr50s14QWch7rEh5PHef+psgsyqB0iI2e+hE+pDlpvvkQ/uVUMDfdSnTq12TA58injFUdOMPB5AeiALtHcUrstXrqSINnaoVjxyE5ra1ZipHMsTV2kMiQ8NDw7tdmqQ4WtzNEd9uBjXIdJuOE6TMLoy0sct46KHndNS6d2pW5tp+rW+Jw5rVl2qpP5Oqrcnr52w9RMgbfA8db5tAsp8DGuwyTaGW6DB7ppn9CCzxKnvKz9Kz7j/prUi0cwqQLQDBtvrp5uvMc/Wf00oFAT5FjscbcwMloCt1LPWvTUT41sH+M6TMIN12ESw3UPd8gPtrh7JeTyXvZGn0KD0jSlMms5Sfhw92vkUvXT5tPWt3WbSfjMsSFl3ujlJdy+4xkjnFze+PWrNWXWclqaT6t82vq2bjMJnzk2pMzrQwWHSbjhOkzCDdchxpZchpezwySQvHhiyVMLevPRctXwqeWmfcv5GaVTGKRy557YIHnhpETeoCl05grhbPlL89HK1vCp5darvZbgo+XEwYcKDpNww3WYxC6/U5PY5oun66MzPHH8L05PpqHKghn+TpjyictkZQLPh4u6yeknvXeWU+JD6TDHJ/cbn93Bi8nnDKdJm8EG2+zIZwBudlbjUOYOpj1frClPwyf3OZuXuaEx3lgWZixKxIfZ911rvJO65PRFVmZjbYY+VHDYhBuuwyTccB0mcdkB0cr5z70pW/pm7Bo+LesgqUsrPjVye9WXkqld8FiizRCi6LBWjmTRPGGG/JZ5ejvoa1ai1qwvlWarbeZDBYdJuOE6TKKP4W7xJdFb4+R8ZvH5P852gxhpwOZ9AAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDIwLTA4LTIzVDE0OjUyOjAwKzAyOjAwetRgVgAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyMC0wOC0yM1QxNDo1MTo1OCswMjowMJuxI+oAAAAASUVORK5CYII= # yamllint disable-line rule:line-length diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 0a2c48a9..0dba5d9b 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -2149,6 +2149,55 @@ def delete_cache_value(self, key, category=""): def get_datastore_value(self, key, category=""): return self.get_cache_value(key, category=category) + def get_ioc(self, ioc, data_type=""): + if len(data_type) == 0: + ioc_types = ["domains", "urls", "email_addresses", "ipv4s", "ipv6s", "ipv4_cidrs", "md5s", "sha256s", "sha1s", "cves"] + + iocs = find_iocs(str(input_string)) + for key, value in iocs.items(): + for item in value: + if item.lower() == ioc.lower(): + print("[DEBUG] Found IOC %s in type %s" % (ioc, key)) + data_type = key[:-1] + break + + if len(data_type) > 0: + break + + org_id = self.full_execution["workflow"]["execution_org"]["id"] + url = "%s/api/v1/orgs/%s/get_cache" % (self.url, org_id) + data = { + "workflow_id": self.full_execution["workflow"]["id"], + "execution_id": self.current_execution_id, + "authorization": self.authorization, + "org_id": org_id, + "key": str(key), + "category": "ioc_%s" % data_type.replace(" ", "_").lower(), + } + + value = requests.post(url, json=data, verify=False) + try: + allvalues = value.json() + allvalues["key"] = key + + if allvalues["success"] == True and len(allvalues["value"]) > 0: + allvalues["found"] = True + else: + allvalues["success"] = True + allvalues["found"] = False + + try: + parsedvalue = json.loads(allvalues["value"]) + allvalues["value"] = parsedvalue + + except: + pass + + return json.dumps(allvalues) + except: + self.logger.info("Value couldn't be parsed, or json dump of value failed") + return value.text + def get_cache_value(self, key, category=""): org_id = self.full_execution["workflow"]["execution_org"]["id"] url = "%s/api/v1/orgs/%s/get_cache" % (self.url, org_id) @@ -2848,7 +2897,6 @@ def parse_ioc(self, input_string, input_type="all"): print("Invalid key: %s" % key) continue - print(key, value) if len(value) == 0: continue From e0fb386373906cff541b13893caa4f587c14f251 Mon Sep 17 00:00:00 2001 From: Frikky Date: Thu, 18 Dec 2025 15:58:31 +0100 Subject: [PATCH 07/15] Minor fix --- shuffle-tools/1.2.0/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 0dba5d9b..85275cf2 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -2153,7 +2153,7 @@ def get_ioc(self, ioc, data_type=""): if len(data_type) == 0: ioc_types = ["domains", "urls", "email_addresses", "ipv4s", "ipv6s", "ipv4_cidrs", "md5s", "sha256s", "sha1s", "cves"] - iocs = find_iocs(str(input_string)) + iocs = find_iocs(str(ioc)) for key, value in iocs.items(): for item in value: if item.lower() == ioc.lower(): From 2f7c27e2b3530fb39609656e5061f1e1a719ded1 Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Mon, 19 Jan 2026 16:54:46 +0530 Subject: [PATCH 08/15] fix:unsupported hash type MD4 --- active-directory/1.0.0/requirements.txt | 1 + active-directory/1.0.0/src/app.py | 219 +++++++++++++++++------- 2 files changed, 162 insertions(+), 58 deletions(-) diff --git a/active-directory/1.0.0/requirements.txt b/active-directory/1.0.0/requirements.txt index 0d9f9d76..ad59907e 100644 --- a/active-directory/1.0.0/requirements.txt +++ b/active-directory/1.0.0/requirements.txt @@ -1,2 +1,3 @@ shuffle-sdk ldap3==2.9.1 +pycryptodome diff --git a/active-directory/1.0.0/src/app.py b/active-directory/1.0.0/src/app.py index 8b5e1245..67f8ed2c 100644 --- a/active-directory/1.0.0/src/app.py +++ b/active-directory/1.0.0/src/app.py @@ -1,19 +1,24 @@ import json +import hashlib import ldap3 import asyncio -from ldap3 import ( - Server, - Connection, - MODIFY_REPLACE, - ALL_ATTRIBUTES, - NTLM -) +from ldap3 import Server, Connection, MODIFY_REPLACE, ALL_ATTRIBUTES, NTLM + +try: + from Crypto.Hash import MD4 as CryptoMD4 +except ImportError: + CryptoMD4 = None -from ldap3.extend.microsoft.addMembersToGroups import ad_add_members_to_groups as addUsersInGroups -from ldap3.extend.microsoft.removeMembersFromGroups import ad_remove_members_from_groups as removeUsersFromGroups +from ldap3.extend.microsoft.addMembersToGroups import ( + ad_add_members_to_groups as addUsersInGroups, +) +from ldap3.extend.microsoft.removeMembersFromGroups import ( + ad_remove_members_from_groups as removeUsersFromGroups, +) from shuffle_sdk import AppBase + class ActiveDirectory(AppBase): __version__ = "1.0.1" app_name = "Active Directory" # this needs to match "name" in api.yaml @@ -28,11 +33,28 @@ def __init__(self, redis, logger, console_logger=None): super().__init__(redis, logger, console_logger) def __ldap_connection(self, server, port, domain, login_user, password, use_ssl): - use_SSL = False if use_ssl.lower() == "false" else True + use_SSL = False if use_ssl.lower() == "false" else True login_dn = domain + "\\" + login_user s = Server(server, port=int(port), use_ssl=use_SSL) - c = Connection(s, user=login_dn, password=password, authentication=NTLM, auto_bind=True) + + if CryptoMD4 and not getattr(hashlib, "__active_directory_md4_patch__", False): + try: + import ldap3.utils.ntlm as ldap3_ntlm + + def _md4_hash(data): + md4 = CryptoMD4.new() + md4.update(data) + return md4.digest() + + ldap3_ntlm.hashlib.md4 = _md4_hash + hashlib.__active_directory_md4_patch__ = True + except Exception: + pass + + c = Connection( + s, user=login_dn, password=password, authentication=NTLM, auto_bind=True + ) return c # Decode UserAccountControl code @@ -137,21 +159,28 @@ def user_attributes( result = json.loads(c.response_to_json()) if len(result["entries"]) == 0: - return json.dumps({ - "success": False, - "result": result, - "reason": "No user found for %s" % samaccountname, - }) + return json.dumps( + { + "success": False, + "result": result, + "reason": "No user found for %s" % samaccountname, + } + ) except Exception as e: - return json.dumps({ - "success": False, - "reason": "Failed to get users in user attributes: %s" % e, - }) - + return json.dumps( + { + "success": False, + "reason": "Failed to get users in user attributes: %s" % e, + } + ) result = result["entries"][0] - result["attributes"]["userAccountControl"] = self.__getUserAccountControlAttributes(result["attributes"]["userAccountControl"]) + result["attributes"]["userAccountControl"] = ( + self.__getUserAccountControlAttributes( + result["attributes"]["userAccountControl"] + ) + ) return json.dumps(result) @@ -180,7 +209,19 @@ def set_password( server, port, domain, login_user, password, use_ssl ) - result = json.loads( self.user_attributes( server, port, domain, login_user, password, base_dn, use_ssl, samaccountname, search_base,)) + result = json.loads( + self.user_attributes( + server, + port, + domain, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + ) + ) user_dn = result["dn"] c.extend.microsoft.modify_password(user_dn, new_password) @@ -243,7 +284,6 @@ def enable_user( samaccountname, search_base, ): - if search_base: base_dn = search_base @@ -299,7 +339,6 @@ def disable_user( samaccountname, search_base, ): - if search_base: base_dn = search_base @@ -326,7 +365,6 @@ def disable_user( "success": False, "reason": "Failed to get result attributes: %s" % e, } - if "ACCOUNTDISABLED" in userAccountControl: try: @@ -362,8 +400,18 @@ def disable_user( "reason": "Failed adding ACCOUNTDISABLED to user: %s" % e, } - def lock_user(self,server,domain,port,login_user,password,base_dn,use_ssl,samaccountname,search_base): - + def lock_user( + self, + server, + domain, + port, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + ): if search_base: base_dn = search_base @@ -372,19 +420,29 @@ def lock_user(self,server,domain,port,login_user,password,base_dn,use_ssl,samacc c.search(base_dn, f"(SAMAccountName={samaccountname})") if len(c.entries) == 0: - return {"success":"false","message":f"User {samaccountname} not found"} + return {"success": "false", "message": f"User {samaccountname} not found"} user_dn = c.entries[0].entry_dn - c.modify(user_dn, {'userAccountControl':[(MODIFY_REPLACE,[514])]}) + c.modify(user_dn, {"userAccountControl": [(MODIFY_REPLACE, [514])]}) result = c.result result["success"] = True return result - - def unlock_user(self,server,domain,port,login_user,password,base_dn,use_ssl,samaccountname,search_base): - + + def unlock_user( + self, + server, + domain, + port, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + ): if search_base: base_dn = search_base @@ -393,44 +451,72 @@ def unlock_user(self,server,domain,port,login_user,password,base_dn,use_ssl,sama c.search(base_dn, f"(SAMAccountName={samaccountname})") if len(c.entries) == 0: - return {"success":"false","message":f"User {samaccountname} not found"} + return {"success": "false", "message": f"User {samaccountname} not found"} user_dn = c.entries[0].entry_dn - c.modify(user_dn, {'userAccountControl':[(MODIFY_REPLACE,[0])]}) + c.modify(user_dn, {"userAccountControl": [(MODIFY_REPLACE, [0])]}) result = c.result result["success"] = True return result - - def change_user_password_at_next_login(self,server,domain,port,login_user,password,base_dn,use_ssl,samaccountname,search_base,new_user_password,repeat_new_user_password): - + + def change_user_password_at_next_login( + self, + server, + domain, + port, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + new_user_password, + repeat_new_user_password, + ): if search_base: base_dn = search_base if str(new_user_password) != str(repeat_new_user_password): - return {"success":"false","message":"new_user_password and repeat_new_user_password does not match."} + return { + "success": "false", + "message": "new_user_password and repeat_new_user_password does not match.", + } c = self.__ldap_connection(server, port, domain, login_user, password, use_ssl) c.search(base_dn, f"(SAMAccountName={samaccountname})") if len(c.entries) == 0: - return {"success":"false","message":f"User {samaccountname} not found"} + return {"success": "false", "message": f"User {samaccountname} not found"} user_dn = c.entries[0].entry_dn - c.modify(user_dn, {'pwdLastSet':(MODIFY_REPLACE, [0])}) - c.extend.microsoft.modify_password(user_dn, new_user_password.encode('utf-16-le')) + c.modify(user_dn, {"pwdLastSet": (MODIFY_REPLACE, [0])}) + c.extend.microsoft.modify_password( + user_dn, new_user_password.encode("utf-16-le") + ) result = c.result result["success"] = True return result - def add_user_to_group(self, server, domain, port, login_user, password, base_dn, use_ssl, samaccountname, search_base, group_name): - + def add_user_to_group( + self, + server, + domain, + port, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + group_name, + ): if search_base: base_dn = search_base @@ -438,24 +524,38 @@ def add_user_to_group(self, server, domain, port, login_user, password, base_dn, c.search(base_dn, f"(SAMAccountName={samaccountname})") if len(c.entries) == 0: - return {"success":"false","message":f"User {samaccountname} not found"} + return {"success": "false", "message": f"User {samaccountname} not found"} user_dn = c.entries[0].entry_dn - search_filter = f'(&(objectClass=group)(cn={group_name}))' + search_filter = f"(&(objectClass=group)(cn={group_name}))" c.search(base_dn, search_filter, attributes=["distinguishedName"]) if len(c.entries) == 0: - return {"success":"false","message":f"Group {group_name} not found"} + return {"success": "false", "message": f"Group {group_name} not found"} group_dn = c.entries[0]["distinguishedName"] print(group_dn) - res = addUsersInGroups(c, user_dn, str(group_dn),fix=True) + res = addUsersInGroups(c, user_dn, str(group_dn), fix=True) if res == True: - return {"success":"true","message":f"User {samaccountname} was added to group {group_name}"} + return { + "success": "true", + "message": f"User {samaccountname} was added to group {group_name}", + } else: - return {"success":"false","message":f"Could not add user to group"} + return {"success": "false", "message": f"Could not add user to group"} - def remove_user_from_group(self, server, domain, port, login_user, password, base_dn, use_ssl, samaccountname, search_base, group_name): - + def remove_user_from_group( + self, + server, + domain, + port, + login_user, + password, + base_dn, + use_ssl, + samaccountname, + search_base, + group_name, + ): if search_base: base_dn = search_base @@ -463,21 +563,24 @@ def remove_user_from_group(self, server, domain, port, login_user, password, bas c.search(base_dn, f"(SAMAccountName={samaccountname})") if len(c.entries) == 0: - return {"success":"false","message":f"User {samaccountname} not found"} + return {"success": "false", "message": f"User {samaccountname} not found"} user_dn = c.entries[0].entry_dn - search_filter = f'(&(objectClass=group)(cn={group_name}))' + search_filter = f"(&(objectClass=group)(cn={group_name}))" c.search(base_dn, search_filter, attributes=["distinguishedName"]) if len(c.entries) == 0: - return {"success":"false","message":f"Group {group_name} not found"} + return {"success": "false", "message": f"Group {group_name} not found"} group_dn = c.entries[0]["distinguishedName"] print(group_dn) - res = removeUsersFromGroups(c, user_dn, str(group_dn),fix=True) + res = removeUsersFromGroups(c, user_dn, str(group_dn), fix=True) if res == True: - return {"success":"true","message":f"User {samaccountname} was removed from group {group_name}"} + return { + "success": "true", + "message": f"User {samaccountname} was removed from group {group_name}", + } else: - return {"success":"false","message":f"Could not remove user to group"} + return {"success": "false", "message": f"Could not remove user to group"} if __name__ == "__main__": From 122793913b991ab59fb532c5a98a98c34d427aae Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Sun, 25 Jan 2026 19:28:18 +0530 Subject: [PATCH 09/15] added AGENTS.md for agent context --- AGENTS.md | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..271c0fc2 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,76 @@ +# Repository Guidelines + +## Project Structure & Module Organization +This repository hosts Shuffle app implementations. Each app lives in a top-level folder named after the integration (e.g., `aws-ec2/`), and each release is versioned under a subfolder like `1.0.0/`. A typical app version contains: + +- `src/app.py`: the Shuffle SDK entry point. +- `api.yaml`: OpenAPI definition used by Shuffle. +- `requirements.txt`: Python dependencies for the app. +- `Dockerfile`: container build instructions for the app. +- `README.md`: app-specific usage and action documentation. +- Optional assets such as screenshots (`*.png`). + +In `api.yaml`, prefer an `authentication` block for shared credentials (URL, tokens, keys). Actions should only include auth parameters when they truly differ per call. + +## Build, Test, and Development Commands +Apps are built and run container-first via the Shuffle SDK image. From an app version directory: + +- `docker build -t shuffle-: .`: build the app image. +- `docker run --rm shuffle-:`: run the app container locally. + +For quick iteration on code, you can also run the Python entrypoint in a virtualenv: + +- `pip install -r requirements.txt` +- `python src/app.py --log-level DEBUG` + +## Coding Style & Naming Conventions +Use 4-space indentation and standard Python style. Keep functions `snake_case`, classes `CamelCase`, and constants `UPPER_SNAKE_CASE`. Match existing patterns in `src/app.py` and keep action names aligned with `api.yaml`. + +## Creating New Shuffle Apps (Agent Workflow) +Use an existing app as a template (e.g., `http/1.4.0/` or `aws-ec2/1.0.0/`) and follow the same folder layout. A minimal, working app version should include: + +- `api.yaml`: action definitions, parameters, and examples. +- `src/app.py`: class extending the Shuffle SDK (`shuffle_sdk.AppBase`). +- `requirements.txt`: third-party dependencies. +- `Dockerfile`: built on `frikky/shuffle:app_sdk`. + +When adding actions, ensure the `api.yaml` action name matches the method name in `src/app.py` and parameter names align exactly. Keep input parsing defensive (strings vs JSON), and return JSON-serializable results. For HTTP integrations, centralize auth and base URL handling and add a TLS `verify` option. If a service requires special payloads (e.g., ADF for Jira), accept JSON strings and pass through unchanged. Keep `api.yaml` examples realistic because they show up in the Shuffle UI. + +## Authentication & App Configuration +Most apps declare credentials in `api.yaml` under `authentication:` so Shuffle injects them automatically. In code, read those values as normal action arguments (Shuffle passes them into each action). Prefer a single auth helper in `src/app.py` (e.g., `_auth()` for tokens, `_build_api_base()` for base URLs) and reuse it across actions. If an integration supports multiple auth modes (token vs password), accept both and choose the provided one. + +Prefer small, focused actions (create, update, list, search) and document auth requirements and examples in the app `README.md`. + +## Manual Python App Notes (From Shuffle Docs) +- **SDK image choices:** Shuffle provides Alpine (slim), Kali (security tooling), and Blackarch (kitchen‑sink). This repo’s Dockerfiles typically use `frikky/shuffle:app_sdk` (Alpine‑based) unless a toolset requires otherwise. +- **Directory layout:** `api.yaml`, `Dockerfile`, `requirements.txt`, `README.md`, and `src/app.py` are expected in each app version. Complex apps can add additional modules under `src/` and import them from `app.py`. +- **Actions & wiring:** Every action in `api.yaml` must map to a method in `src/app.py` with the same name and argument list. Authentication parameters are passed into each action automatically when declared under `authentication:`. +- **Utility helpers:** In `AppBase`, you can use `self.get_file`, `self.set_files`, `self.update_file`, and cache helpers `self.get_cache`, `self.set_cache`, `self.delete_cache` for file and key/value workflows. +- **Prototyping:** Build and test your Python logic locally first, then wire it into `src/app.py`. Keep return values JSON‑serializable so Shuffle can consume them. +- **Upload & hotload:** After a prototype works, upload it to Shuffle (cloud) or hotload locally (on‑prem) by rebuilding the app image. Local Docker rebuilds are faster for iteration. + +## Testing, Hotloading, and CI/CD +- **Cloud upload test:** Use the Upload App API to add the app to your org, then run a workflow to validate actions. +- **Local hotload (on‑prem):** Place the app folder in `shuffle-apps/`, set `SHUFFLE_APP_HOTLOAD_FOLDER=./shuffle-apps`, then use the hot reload button in the UI. Allow ~20 seconds for the reload to complete. +- **Update workflow deps:** If you update an existing app version, remove and re‑add the app in any workflows that reference it. +- **Fast local iteration:** After the first upload, rebuild locally: `docker images | grep ` then `docker build . -t `. +- **CI/CD pattern:** Create a test workflow, upload a test app version, run the workflow via API, and validate `workflowexecution.workflow.validation.valid` before promoting. + +## Publishing Apps +- **OpenAPI apps:** Upload to your instance, then use the `/apps` page to publish so it appears on `shuffler.io`. +- **Python apps:** Fork `https://github.com/frikky/shuffle-apps`, add your app, and open a pull request to upstream. + +## Testing Guidelines +There is no repository-wide test suite. If you add tests for a specific app, keep them alongside the app version (e.g., `aws-ec2/1.0.0/tests/`) and document how to run them in that app’s `README.md`. + +## Commit & Pull Request Guidelines +Commit messages are short and descriptive, sometimes using a prefix like `fix:`. Follow that style and keep commits scoped to a single app/version when possible. + +For pull requests, include: + +- A clear description of the change and impacted app/version path. +- Updated `README.md` or `api.yaml` when behavior changes. +- Screenshots/assets if user-facing output or UI-related docs are affected. + +## Security & Configuration Tips +Many apps require API keys or credentials. Do not commit secrets; use environment variables or Shuffle configuration fields instead, and document required inputs in the app’s `README.md`. From 1bc69724965a0df668cacdd414564c628d79389d Mon Sep 17 00:00:00 2001 From: yashsinghcodes Date: Sat, 7 Feb 2026 14:00:11 +0530 Subject: [PATCH 10/15] option to not return cache values --- shuffle-tools/1.2.0/api.yaml | 26 + shuffle-tools/1.2.0/src/app.py | 884 +++++++++++++++++++-------------- 2 files changed, 548 insertions(+), 362 deletions(-) diff --git a/shuffle-tools/1.2.0/api.yaml b/shuffle-tools/1.2.0/api.yaml index ebbf7c3e..ab5337ee 100644 --- a/shuffle-tools/1.2.0/api.yaml +++ b/shuffle-tools/1.2.0/api.yaml @@ -1279,6 +1279,20 @@ actions: example: "tickets" schema: type: string + - name: return_values + description: Whether to include the cache values in the response + required: false + options: + - true + - false + multiline: false + example: "false" + value: "true" + schema: + type: string + returns: + schema: + type: string - name: get_cache_value description: Get a value saved to your organization in Shuffle. Deprecated for "get_datastore_value" parameters: @@ -1300,6 +1314,7 @@ actions: schema: type: string - name: delete_cache_value + description: Delete a value saved to your organization in Shuffle. Deprecated for "delete_datastore_value" parameters: - name: key @@ -1381,6 +1396,17 @@ actions: example: "tickets" schema: type: string + - name: return_values + description: Whether to include the cache values in the response + required: false + options: + - true + - false + multiline: false + example: "false" + value: "true" + schema: + type: string #- name: parse_ioc_new # description: Parse IOC's based on https://github.com/fhightower/ioc-finder # parameters: diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 85275cf2..e1bbdac3 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -41,15 +41,16 @@ import concurrent.futures import multiprocessing -#from walkoff_app_sdk.app_base import AppBase +# from walkoff_app_sdk.app_base import AppBase from shuffle_sdk import AppBase -# Override exit(), sys.exit, and os._exit +# Override exit(), sys.exit, and os._exit # sys.exit() can be caught, meaning we can have a custom handler for it builtins.exit = sys.exit os.exit = sys.exit os._exit = sys.exit + class Tools(AppBase): __version__ = "1.2.0" app_name = ( @@ -86,7 +87,7 @@ def base64_conversion(self, string, operation): # Decode the base64 into an image and upload it as a file decoded_bytes = base64.b64decode(string) - # Make the bytes into unicode escaped bytes + # Make the bytes into unicode escaped bytes # UnicodeDecodeError - 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte try: decoded_bytes = str(decoded_bytes, "utf-8") @@ -96,7 +97,7 @@ def base64_conversion(self, string, operation): filename = "base64_image.png" file = { "filename": filename, - "data": decoded_bytes, + "data": decoded_bytes, } fileret = self.set_files([file]) @@ -107,7 +108,6 @@ def base64_conversion(self, string, operation): return value elif operation == "decode": - if "-" in string: string = string.replace("-", "+", -1) @@ -118,18 +118,19 @@ def base64_conversion(self, string, operation): if len(string) % 4 != 0: string += "=" * (4 - len(string) % 4) - # For loop this. It's stupid. - decoded_bytes = "" + decoded_bytes = "" try: decoded_bytes = base64.b64decode(string) except Exception as e: - return json.dumps({ - "success": False, - "reason": "Invalid Base64 - %s" % e, - }) + return json.dumps( + { + "success": False, + "reason": "Invalid Base64 - %s" % e, + } + ) - #if "incorrect padding" in str(e).lower(): + # if "incorrect padding" in str(e).lower(): # try: # decoded_bytes = base64.b64decode(string + "=") # except Exception as e: @@ -144,7 +145,6 @@ def base64_conversion(self, string, operation): # if "incorrect padding" in str(e).lower(): # return "Invalid Base64" - try: decoded_bytes = str(decoded_bytes, "utf-8") except: @@ -199,7 +199,6 @@ def send_sms_shuffle(self, apikey, phone_numbers, body): def send_email_shuffle(self, apikey, recipients, subject, body, attachments=""): recipients = self.parse_list_internal(recipients) - targets = [recipients] if ", " in recipients: targets = recipients.split(", ") @@ -207,9 +206,9 @@ def send_email_shuffle(self, apikey, recipients, subject, body, attachments=""): targets = recipients.split(",") data = { - "targets": targets, - "subject": subject, - "body": body, + "targets": targets, + "subject": subject, + "body": body, "type": "alert", "email_app": True, } @@ -222,11 +221,10 @@ def send_email_shuffle(self, apikey, recipients, subject, body, attachments=""): for item in attachments: new_file = self.get_file(file_ids) files.append(new_file) - + data["attachments"] = files except Exception as e: pass - url = "https://shuffler.io/functions/sendmail" headers = {"Authorization": "Bearer %s" % apikey} @@ -249,7 +247,8 @@ def dedup_and_merge(self, key, value, timeout, set_skipped=True): response = { "success": False, "datastore_key": cachekey, - "info": "All keys from the last %d seconds with the key '%s' have been merged. The result was set to SKIPPED in all other actions." % (timeout, key), + "info": "All keys from the last %d seconds with the key '%s' have been merged. The result was set to SKIPPED in all other actions." + % (timeout, key), "timeout": timeout, "original_value": value, "all_values": [], @@ -272,7 +271,7 @@ def dedup_and_merge(self, key, value, timeout, set_skipped=True): found_cache["value"].append(value) if "created" in found_cache: if found_cache["created"] + timeout + 3 < time.time(): - set_skipped = False + set_skipped = False response["success"] = True response["all_values"] = found_cache["value"] @@ -280,24 +279,37 @@ def dedup_and_merge(self, key, value, timeout, set_skipped=True): return json.dumps(response) else: - self.logger.info("Dedup-key is already handled in another workflow with timeout %d" % timeout) + self.logger.info( + "Dedup-key is already handled in another workflow with timeout %d" + % timeout + ) self.set_cache(cachekey, json.dumps(found_cache["value"])) if set_skipped == True: self.action_result["status"] = "SKIPPED" - self.action_result["result"] = json.dumps({ - "status": False, - "reason": "Dedup-key is already handled in another workflow with timeout %d" % timeout, - }) + self.action_result["result"] = json.dumps( + { + "status": False, + "reason": "Dedup-key is already handled in another workflow with timeout %d" + % timeout, + } + ) - self.send_result(self.action_result, {"Authorization": "Bearer %s" % self.authorization}, "/api/v1/streams") + self.send_result( + self.action_result, + {"Authorization": "Bearer %s" % self.authorization}, + "/api/v1/streams", + ) return found_cache parsedvalue = [value] resp = self.set_cache(cachekey, json.dumps(parsedvalue)) - self.logger.info("Sleeping for %d seconds while waiting for cache to fill up elsewhere" % timeout) + self.logger.info( + "Sleeping for %d seconds while waiting for cache to fill up elsewhere" + % timeout + ) time.sleep(timeout) found_cache = self.get_cache(cachekey) @@ -307,7 +319,6 @@ def dedup_and_merge(self, key, value, timeout, set_skipped=True): self.delete_cache(cachekey) return json.dumps(response) - # https://github.com/fhightower/ioc-finder def parse_file_ioc(self, file_ids, input_type="all"): def parse(data): @@ -326,7 +337,8 @@ def parse(data): for subitem in subvalue: data = { "data": subitem, - "data_type": "%s_%s" % (key[:-1], subkey), + "data_type": "%s_%s" + % (key[:-1], subkey), } if data not in newarray: newarray.append(data) @@ -405,10 +417,7 @@ def set_json_key(self, json_object, key, value): try: json_object = json.loads(json_object) except json.decoder.JSONDecodeError as e: - return { - "success": False, - "reason": "Item is not valid JSON" - } + return {"success": False, "reason": "Item is not valid JSON"} if isinstance(json_object, list): if len(json_object) == 1: @@ -416,16 +425,15 @@ def set_json_key(self, json_object, key, value): else: return { "success": False, - "reason": "Item is valid JSON, but can't handle lists. Use .#" + "reason": "Item is valid JSON, but can't handle lists. Use .#", } - #if not isinstance(json_object, object): + # if not isinstance(json_object, object): # return { # "success": False, # "reason": "Item is not valid JSON (2)" # } - if isinstance(value, str): try: value = json.loads(value) @@ -435,7 +443,7 @@ def set_json_key(self, json_object, key, value): # Handle JSON paths if "." in key: base_object = json.loads(json.dumps(json_object)) - #base_object.output.recipients.notificationEndpointIds = ... + # base_object.output.recipients.notificationEndpointIds = ... keys = key.split(".") if len(keys) >= 1: @@ -444,14 +452,14 @@ def set_json_key(self, json_object, key, value): # This is awful :) buildstring = "base_object" for subkey in keys: - buildstring += f"[\"{subkey}\"]" + buildstring += f'["{subkey}"]' buildstring += f" = {value}" - #output = + # output = exec(buildstring) json_object = base_object - #json_object[first_object] = base_object + # json_object[first_object] = base_object else: json_object[key] = value @@ -520,9 +528,9 @@ def replace_value_from_dictionary(self, input_data, mapping, default_value=""): except: self.logger.info(f"Failed mapping output data for key {key}") - return input_data + return input_data - # Changed with 1.1.0 to run with different returns + # Changed with 1.1.0 to run with different returns def regex_capture_group(self, input_data, regex): try: returnvalues = { @@ -533,8 +541,8 @@ def regex_capture_group(self, input_data, regex): found = False for item in matches: if isinstance(item, str): - found = True - name = "group_0" + found = True + name = "group_0" try: returnvalues[name].append(item) except: @@ -542,7 +550,7 @@ def regex_capture_group(self, input_data, regex): else: for i in range(0, len(item)): - found = True + found = True name = "group_%d" % i try: returnvalues[name].append(item[i]) @@ -558,10 +566,7 @@ def regex_capture_group(self, input_data, regex): "reason": "Bad regex pattern: %s" % e, } - def regex_replace( - self, input_data, regex, replace_string="", ignore_case="False" - ): - + def regex_replace(self, input_data, regex, replace_string="", ignore_case="False"): if ignore_case.lower().strip() == "true": return re.sub(regex, replace_string, input_data, flags=re.IGNORECASE) else: @@ -587,10 +592,11 @@ def execute_python(self, code): # 2. Subprocess execute file? try: f = StringIO() + def custom_print(*args, **kwargs): return print(*args, file=f, **kwargs) - - #with redirect_stdout(f): # just in case + + # with redirect_stdout(f): # just in case # Add globals in it too globals_copy = globals().copy() globals_copy["print"] = custom_print @@ -600,7 +606,6 @@ def custom_print(*args, **kwargs): except Exception as e: self.logger.info(f"Failed to add singul to python globals: {e}") - # Add self to globals_copy for key, value in locals().copy().items(): if key not in globals_copy: @@ -614,7 +619,7 @@ def custom_print(*args, **kwargs): # Same as a return pass except SyntaxError as e: - # Special handler for return usage. Makes return act as + # Special handler for return usage. Makes return act as # an exit() if "'return' outside function" in str(e): return { @@ -631,11 +636,11 @@ def custom_print(*args, **kwargs): # Reason: SyntaxError makes it crash BEFORE it reaches the return s = f.getvalue() - f.close() # why: https://www.youtube.com/watch?v=6SA6S9Ca5-U + f.close() # why: https://www.youtube.com/watch?v=6SA6S9Ca5-U - #try: + # try: # s = s.encode("utf-8") - #except Exception as e: + # except Exception as e: try: return { @@ -653,7 +658,7 @@ def custom_print(*args, **kwargs): "success": True, "message": s, } - + except Exception as e: return { "success": False, @@ -726,7 +731,6 @@ def preload_cache(self, key): response_data["value"] = parsed return get_response.json() - def update_cache(self, key): org_id = self.full_execution["workflow"]["execution_org"]["id"] url = f"{self.url}/api/v1/orgs/{org_id}/set_cache" @@ -743,9 +747,7 @@ def update_cache(self, key): self.cache_update_buffer = [] return get_response.json() - def filter_list(self, input_list, field, check, value, opposite): - # Remove hashtags on the fly # E.g. #.fieldname or .#.fieldname @@ -754,20 +756,22 @@ def filter_list(self, input_list, field, check, value, opposite): flip = True try: - #input_list = eval(input_list) # nosec + # input_list = eval(input_list) # nosec input_list = json.loads(input_list) except Exception: try: input_list = input_list.replace("'", '"', -1) input_list = json.loads(input_list) except Exception: - self.logger.info("[WARNING] Error parsing string to array. Continuing anyway.") + self.logger.info( + "[WARNING] Error parsing string to array. Continuing anyway." + ) # Workaround D: if not isinstance(input_list, list): return { "success": False, - "reason": "Error: input isnt a list. Please use conditions instead if using JSON.", + "reason": "Error: input isnt a list. Please use conditions instead if using JSON.", "valid": [], "invalid": [], } @@ -799,7 +803,6 @@ def filter_list(self, input_list, field, check, value, opposite): except json.decoder.JSONDecodeError as e: pass - # EQUALS JUST FOR STR if check == "equals": # Mostly for bools @@ -817,7 +820,7 @@ def filter_list(self, input_list, field, check, value, opposite): for subcheck in checklist: subcheck = str(subcheck).strip() - #ext.lower().strip() == value.lower().strip() + # ext.lower().strip() == value.lower().strip() if type(tmp) == list and subcheck in tmp: new_list.append(item) found = True @@ -874,8 +877,10 @@ def filter_list(self, input_list, field, check, value, opposite): # CONTAINS FIND FOR LIST AND IN FOR STR elif check == "contains": - #if str(value).lower() in str(tmp).lower(): - if str(value).lower() in str(tmp).lower() or self.check_wildcard(value, tmp): + # if str(value).lower() in str(tmp).lower(): + if str(value).lower() in str(tmp).lower() or self.check_wildcard( + value, tmp + ): new_list.append(item) else: failed_list.append(item) @@ -885,7 +890,9 @@ def filter_list(self, input_list, field, check, value, opposite): checklist = value.split(",") found = False for checker in checklist: - if str(checker).lower() in str(tmp).lower() or self.check_wildcard(checker, tmp): + if str(checker).lower() in str( + tmp + ).lower() or self.check_wildcard(checker, tmp): new_list.append(item) found = True break @@ -929,9 +936,9 @@ def filter_list(self, input_list, field, check, value, opposite): failed_list.append(item) elif check == "less than": # Old - #if int(tmp) < int(value): + # if int(tmp) < int(value): # new_list.append(item) - #else: + # else: # failed_list.append(item) list_set = False @@ -970,16 +977,17 @@ def filter_list(self, input_list, field, check, value, opposite): else: failed_list.append(item) - if len(self.cache_update_buffer) > 400 or (item == input_list[-1] and len(self.cache_update_buffer) > 0): + if len(self.cache_update_buffer) > 400 or ( + item == input_list[-1] and len(self.cache_update_buffer) > 0 + ): self.update_cache(value) - - #return { + # return { # "success": True, # "found": False, # "key": key, # "value": new_value, - #} + # } # SINGLE ITEM COULD BE A FILE OR A LIST OF FILES elif check == "files by extension": @@ -989,7 +997,7 @@ def filter_list(self, input_list, field, check, value, opposite): for file_id in tmp: filedata = self.get_file(file_id) _, ext = os.path.splitext(filedata["filename"]) - if (ext.lower().strip() == value.lower().strip()): + if ext.lower().strip() == value.lower().strip(): file_list.append(file_id) # else: # failed_list.append(file_id) @@ -1023,11 +1031,11 @@ def filter_list(self, input_list, field, check, value, opposite): failed_list = tmplist try: - data ={ - "success": True, - "valid": new_list, - "invalid": failed_list, - } + data = { + "success": True, + "valid": new_list, + "invalid": failed_list, + } return json.dumps(data) # new_list = json.dumps(new_list) @@ -1041,7 +1049,7 @@ def filter_list(self, input_list, field, check, value, opposite): return new_list - #def multi_list_filter(self, input_list, field, check, value): + # def multi_list_filter(self, input_list, field, check, value): # input_list = input_list.replace("'", '"', -1) # input_list = json.loads(input_list) @@ -1129,13 +1137,13 @@ def create_file(self, filename, data, category=""): try: if str(data).startswith("b'") and str(data).endswith("'"): data = data[2:-1] - if str(data).startswith("\"") and str(data).endswith("\""): + if str(data).startswith('"') and str(data).endswith('"'): data = data[2:-1] except Exception as e: self.logger.info(f"Exception: {e}") try: - #if not isinstance(data, str) and not isinstance(data, int) and not isinstance(float) and not isinstance(data, bool): + # if not isinstance(data, str) and not isinstance(data, int) and not isinstance(float) and not isinstance(data, bool): if isinstance(data, dict) or isinstance(data, list): data = json.dumps(data) except: @@ -1152,13 +1160,13 @@ def create_file(self, filename, data, category=""): if len(fileret) == 1: value = {"success": True, "filename": filename, "file_id": fileret[0]} - return value + return value - # Input is WAS a file, hence it didn't get the files + # Input is WAS a file, hence it didn't get the files def list_file_category_ids(self, file_category): return self.get_file_category_ids(file_category) - # Input is WAS a file, hence it didn't get the files + # Input is WAS a file, hence it didn't get the files # Category doesn't matter as it uses file ID, which is unique anyway def get_file_value(self, filedata, category=""): filedata = self.get_file(filedata, category) @@ -1218,7 +1226,6 @@ def download_remote_file(self, url, custom_filename="", category=""): return value - def extract_archive(self, file_id, fileformat="zip", password=None): try: return_data = {"success": False, "files": []} @@ -1227,7 +1234,6 @@ def extract_archive(self, file_id, fileformat="zip", password=None): return_ids = None with tempfile.TemporaryDirectory() as tmpdirname: - # Get archive and save phisically with open(os.path.join(tmpdirname, "archive"), "wb") as f: f.write(item["data"]) @@ -1237,7 +1243,9 @@ def extract_archive(self, file_id, fileformat="zip", password=None): # Zipfile for zipped archive if fileformat.strip().lower() == "zip": try: - with zipfile.ZipFile(os.path.join(tmpdirname, "archive")) as z_file: + with zipfile.ZipFile( + os.path.join(tmpdirname, "archive") + ) as z_file: if password: z_file.setpassword(bytes(password.encode())) @@ -1248,7 +1256,10 @@ def extract_archive(self, file_id, fileformat="zip", password=None): source = z_file.open(member) to_be_uploaded.append( - {"filename": source.name.split("/")[-1], "data": source.read()} + { + "filename": source.name.split("/")[-1], + "data": source.read(), + } ) return_data["success"] = True @@ -1276,7 +1287,10 @@ def extract_archive(self, file_id, fileformat="zip", password=None): source = z_file.open(member) to_be_uploaded.append( - {"filename": source.name.split("/")[-1], "data": source.read()} + { + "filename": source.name.split("/")[-1], + "data": source.read(), + } ) return_data["success"] = True @@ -1319,7 +1333,9 @@ def extract_archive(self, file_id, fileformat="zip", password=None): ) elif fileformat.strip().lower() == "tar.gz": try: - with tarfile.open(os.path.join(tmpdirname, "archive"), mode="r:gz") as z_file: + with tarfile.open( + os.path.join(tmpdirname, "archive"), mode="r:gz" + ) as z_file: for member in z_file.getnames(): member_files = z_file.extractfile(member) @@ -1444,22 +1460,23 @@ def create_archive(self, file_ids, fileformat, name, password=None): # Create archive temporary with tempfile.NamedTemporaryFile() as archive: - if fileformat == "zip": archive_name = "archive.zip" if not name else name - pwd = password if isinstance(password, (bytes, bytearray)) else password.encode() + pwd = ( + password + if isinstance(password, (bytes, bytearray)) + else password.encode() + ) with pyzipper.AESZipFile( - archive.name, - "w", - compression=pyzipper.ZIP_DEFLATED - ) as zf: - zf.setpassword(pwd) - zf.setencryption(pyzipper.WZ_AES, nbits=256) + archive.name, "w", compression=pyzipper.ZIP_DEFLATED + ) as zf: + zf.setpassword(pwd) + zf.setencryption(pyzipper.WZ_AES, nbits=256) - for path in paths: - zf.write(path, arcname=os.path.basename(path)) + for path in paths: + zf.write(path, arcname=os.path.basename(path)) elif fileformat == "7zip": archive_name = "archive.7z" if not name else name @@ -1491,8 +1508,13 @@ def create_archive(self, file_ids, fileformat, name, password=None): return {"success": False, "message": excp} def add_list_to_list(self, list_one, list_two): - if not isinstance(list_one, list) and not isinstance(list_one, dict): - if not list_one or list_one == " " or list_one == "None" or list_one == "null": + if not isinstance(list_one, list) and not isinstance(list_one, dict): + if ( + not list_one + or list_one == " " + or list_one == "None" + or list_one == "null" + ): list_one = "[]" try: @@ -1503,11 +1525,16 @@ def add_list_to_list(self, list_one, list_two): else: return { "success": False, - "reason": f"List one is not a valid list: {list_one}" + "reason": f"List one is not a valid list: {list_one}", } if not isinstance(list_two, list) and not isinstance(list_two, dict): - if not list_two or list_two == " " or list_two == "None" or list_two == "null": + if ( + not list_two + or list_two == " " + or list_two == "None" + or list_two == "null" + ): list_two = "[]" try: @@ -1518,7 +1545,7 @@ def add_list_to_list(self, list_one, list_two): else: return { "success": False, - "reason": f"List two is not a valid list: {list_two}" + "reason": f"List two is not a valid list: {list_two}", } if isinstance(list_one, dict): @@ -1536,19 +1563,13 @@ def diff_lists(self, list_one, list_two): try: list_one = json.loads(list_one) except json.decoder.JSONDecodeError as e: - return { - "success": False, - "reason": "list_one is not a valid list." - } + return {"success": False, "reason": "list_one is not a valid list."} if isinstance(list_two, str): try: list_two = json.loads(list_two) except json.decoder.JSONDecodeError as e: - return { - "success": False, - "reason": "list_two is not a valid list." - } + return {"success": False, "reason": "list_two is not a valid list."} def diff(li1, li2): try: @@ -1557,7 +1578,7 @@ def diff(li1, li2): # Bad json diffing - at least order doesn't matter :) not_found = [] for item in list_one: - #item = sorted(item.items()) + # item = sorted(item.items()) if item in list_two: pass else: @@ -1585,8 +1606,14 @@ def diff(li1, li2): "diff": newdiff, } - - def merge_lists(self, list_one, list_two, set_field="", sort_key_list_one="", sort_key_list_two=""): + def merge_lists( + self, + list_one, + list_two, + set_field="", + sort_key_list_one="", + sort_key_list_two="", + ): if isinstance(list_one, str): try: list_one = json.loads(list_one) @@ -1603,23 +1630,34 @@ def merge_lists(self, list_one, list_two, set_field="", sort_key_list_one="", so if isinstance(list_one, dict) and isinstance(list_two, dict): for key, value in list_two.items(): list_one[key] = value - + return list_one - return {"success": False, "message": "Both input lists need to be valid JSON lists."} + return { + "success": False, + "message": "Both input lists need to be valid JSON lists.", + } if len(list_one) != len(list_two): - return {"success": False, "message": "Lists length must be the same. %d vs %d. Are you trying to add them to a single list? Use add_list_to_list" % (len(list_one), len(list_two))} + return { + "success": False, + "message": "Lists length must be the same. %d vs %d. Are you trying to add them to a single list? Use add_list_to_list" + % (len(list_one), len(list_two)), + } if len(sort_key_list_one) > 0: try: - list_one = sorted(list_one, key=lambda k: k.get(sort_key_list_one), reverse=True) + list_one = sorted( + list_one, key=lambda k: k.get(sort_key_list_one), reverse=True + ) except: pass if len(sort_key_list_two) > 0: try: - list_two = sorted(list_two, key=lambda k: k.get(sort_key_list_two), reverse=True) + list_two = sorted( + list_two, key=lambda k: k.get(sort_key_list_two), reverse=True + ) except: pass @@ -1633,7 +1671,11 @@ def merge_lists(self, list_one, list_two, set_field="", sort_key_list_one="", so list_one[i][key] = value elif isinstance(list_two[i], str) and list_two[i] == "": continue - elif isinstance(list_two[i], str) or isinstance(list_two[i], int) or isinstance(list_two[i], bool): + elif ( + isinstance(list_two[i], str) + or isinstance(list_two[i], int) + or isinstance(list_two[i], bool) + ): if len(set_field) == 0: list_one[i][base_key] = list_two[i] else: @@ -1648,8 +1690,21 @@ def merge_lists(self, list_one, list_two, set_field="", sort_key_list_one="", so return list_one - def merge_json_objects(self, list_one, list_two, set_field="", sort_key_list_one="", sort_key_list_two=""): - return self.merge_lists(list_one, list_two, set_field=set_field, sort_key_list_one=sort_key_list_one, sort_key_list_two=sort_key_list_two) + def merge_json_objects( + self, + list_one, + list_two, + set_field="", + sort_key_list_one="", + sort_key_list_two="", + ): + return self.merge_lists( + list_one, + list_two, + set_field=set_field, + sort_key_list_one=sort_key_list_one, + sort_key_list_two=sort_key_list_two, + ) def fix_json(self, json_data): try: @@ -1670,14 +1725,14 @@ def fix_json(self, json_data): else: json_data[key] = value - #elif isinstance(value, list): + # elif isinstance(value, list): # json_data[key] = value - #else: + # else: # json_data[key] = value # #for item in json_data[key]: # # if isinstance(item, dict): # # json_data[ - + for key in deletekeys: del json_data[key] @@ -1695,7 +1750,7 @@ def xml_json_convertor(self, convertto, data): try: if convertto == "json": - data = data.replace(" encoding=\"utf-8\"", " ") + data = data.replace(' encoding="utf-8"', " ") ans = xmltodict.parse(data) ans = self.fix_json(ans) json_data = json.dumps(ans) @@ -1705,11 +1760,7 @@ def xml_json_convertor(self, convertto, data): ans = readfromstring(data) return json2xml.Json2xml(ans, wrapper="all", pretty=True).to_xml() except Exception as e: - return { - "success": False, - "input": data, - "reason": f"{e}" - } + return {"success": False, "input": data, "reason": f"{e}"} def date_to_epoch(self, input_data, date_field, date_format): if isinstance(input_data, str): @@ -1727,9 +1778,9 @@ def date_to_epoch(self, input_data, date_field, date_format): def compare_relative_date( self, timestamp, date_format, equality_test, offset, units, direction ): - if timestamp== "None": + if timestamp == "None": return False - + if date_format == "autodetect": input_dt = dateutil_parser(timestamp).replace(tzinfo=None) elif date_format != "%s": @@ -1751,7 +1802,7 @@ def compare_relative_date( if utc_format.endswith("%z"): utc_format = utc_format.replace("%z", "Z") - #if date_format != "%s" and date_format != "autodetect": + # if date_format != "%s" and date_format != "autodetect": if date_format == "autodetect": formatted_dt = datetime.datetime.utcnow() + delta elif date_format != "%s": @@ -1766,24 +1817,24 @@ def compare_relative_date( comparison_dt = formatted_dt elif direction == "ago": comparison_dt = formatted_dt - delta - #formatted_dt - delta - #comparison_dt = datetime.datetime.utcnow() + # formatted_dt - delta + # comparison_dt = datetime.datetime.utcnow() else: comparison_dt = formatted_dt + delta - #comparison_dt = datetime.datetime.utcnow() + # comparison_dt = datetime.datetime.utcnow() diff = int((input_dt - comparison_dt).total_seconds()) if units == "seconds": diff = diff elif units == "minutes": - diff = int(diff/60) + diff = int(diff / 60) elif units == "hours": - diff = int(diff/3600) + diff = int(diff / 3600) elif units == "days": - diff = int(diff/86400) + diff = int(diff / 86400) elif units == "week": - diff = int(diff/604800) + diff = int(diff / 604800) result = False if equality_test == ">": @@ -1797,7 +1848,7 @@ def compare_relative_date( result = not (result) elif equality_test == "=": - result = diff == 0 + result = diff == 0 elif equality_test == "!=": result = diff != 0 @@ -1813,7 +1864,7 @@ def compare_relative_date( parsed_string = "%s %s %s %s" % (equality_test, offset, units, direction) newdiff = diff if newdiff < 0: - newdiff = newdiff*-1 + newdiff = newdiff * -1 return { "success": True, @@ -1821,11 +1872,10 @@ def compare_relative_date( "check": parsed_string, "result": result, "diff": { - "days": int(int(newdiff)/86400), + "days": int(int(newdiff) / 86400), }, } - def run_math_operation(self, operation): result = eval(operation) return result @@ -1840,10 +1890,14 @@ def escape_html(self, input_data): result = markupsafe.escape(mapping) return mapping - def check_datastore_contains(self, key, value, append, category=""): - return check_cache_contains(self, key, value, append, category) + def check_datastore_contains( + self, key, value, append, category="", return_values="true" + ): + return check_cache_contains(self, key, value, append, category, return_values) - def check_cache_contains(self, key, value, append, category=""): + def check_cache_contains( + self, key, value, append, category="", return_values="true" + ): org_id = self.full_execution["workflow"]["execution_org"]["id"] url = "%s/api/v1/orgs/%s/get_cache" % (self.url, org_id) data = { @@ -1862,7 +1916,10 @@ def check_cache_contains(self, key, value, append, category=""): allvalues = {} try: for item in self.local_storage: - if item["execution_id"] == self.current_execution_id and item["key"] == key: + if ( + item["execution_id"] == self.current_execution_id + and item["key"] == key + ): # Max keeping the local cache properly for 5 seconds due to workflow continuations elapsed_time = time.time() - item["time_set"] if elapsed_time > 5: @@ -1871,7 +1928,10 @@ def check_cache_contains(self, key, value, append, category=""): allvalues = item["data"] except Exception as e: - print("[ERROR] Failed cache contains for current execution id local storage: %s" % e) + print( + "[ERROR] Failed cache contains for current execution id local storage: %s" + % e + ) if isinstance(value, dict) or isinstance(value, list): try: @@ -1887,15 +1947,17 @@ def check_cache_contains(self, key, value, append, category=""): if str(append).lower() == "true": append = True else: - append = False + append = False + + include_values = str(return_values).lower() == "true" if "success" not in allvalues: - #get_response = requests.post(url, json=data, verify=False) + # get_response = requests.post(url, json=data, verify=False) pass try: if "success" not in allvalues: - #allvalues = get_response.json() + # allvalues = get_response.json() allvalues = self.shared_cache if "success" not in allvalues: @@ -1925,20 +1987,21 @@ def check_cache_contains(self, key, value, append, category=""): allvalues = set_response.json() self.shared_cache = self.preload_cache(key=key) - newvalue = data["value"] try: newvalue = json.loads(data["value"]) except json.JSONDecodeError: pass - return { + response = { "success": True, "found": False, "key": key, "search": value, - "value": newvalue, } + if include_values: + response["value"] = newvalue + return response except Exception as e: return { "success": False, @@ -1976,8 +2039,8 @@ def check_cache_contains(self, key, value, append, category=""): try: for item in parsedvalue: - #return "%s %s" % (item, value) - #self.logger.info(f"{item} == {value}") + # return "%s %s" % (item, value) + # self.logger.info(f"{item} == {value}") if str(item) == str(value): if not append: try: @@ -1986,25 +2049,31 @@ def check_cache_contains(self, key, value, append, category=""): newdata["data"] = allvalues self.local_storage.append(newdata) except Exception as e: - print("[ERROR] Failed in local storage append: %s" % e) + print( + "[ERROR] Failed in local storage append: %s" % e + ) - return { + response = { "success": True, "found": True, "reason": "Found and not appending!", "key": key, "search": value, - "value": allvalues["value"], } + if include_values: + response["value"] = allvalues["value"] + return response else: - return { + response = { "success": True, "found": True, "reason": "Found, was appending, but item already exists", "key": key, "search": value, - "value": allvalues["value"], } + if include_values: + response["value"] = allvalues["value"] + return response # Lol break @@ -2013,20 +2082,24 @@ def check_cache_contains(self, key, value, append, category=""): append = True if not append: - return { + response = { "success": True, "found": False, "reason": "Not found, not appending (2)!", "key": key, "search": value, - "value": allvalues["value"], } + if include_values: + response["value"] = allvalues["value"] + return response - #parsedvalue.append(value) + # parsedvalue.append(value) - #data["value"] = json.dumps(parsedvalue) + # data["value"] = json.dumps(parsedvalue) - if value not in allvalues["value"] and isinstance(allvalues["value"], list): + if value not in allvalues["value"] and isinstance( + allvalues["value"], list + ): self.cache_update_buffer.append(value) allvalues["value"].append(value) @@ -2052,14 +2125,16 @@ def check_cache_contains(self, key, value, append, category=""): except: pass - return { + response = { "success": True, "found": False, - "reason": f"Appended as it didn't exist", + "reason": "Appended as it didn't exist", "key": key, "search": value, - "value": newvalue, } + if include_values: + response["value"] = newvalue + return response except Exception as e: exception = e pass @@ -2069,10 +2144,10 @@ def check_cache_contains(self, key, value, append, category=""): "found": True, "reason": f"Failed to set append the value: {exception}. This should never happen", "search": value, - "key": key + "key": key, } - #return allvalues + # return allvalues except Exception as e: print("[ERROR] Failed check cache contains: %s" % e) @@ -2084,9 +2159,8 @@ def check_cache_contains(self, key, value, append, category=""): "found": False, } - return value.text + return value.text - ## Adds value to a subkey of the cache ## subkey = "hi", value = "test", overwrite=False ## {"subkey": "hi", "value": "test"} @@ -2124,13 +2198,17 @@ def change_cache_subkey(self, key, subkey, value, overwrite, category=""): try: allvalues = response.json() allvalues["key"] = key - #allvalues["value"] = json.loads(json.dumps(value)) + # allvalues["value"] = json.loads(json.dumps(value)) - if (value.startswith("{") and value.endswith("}")) or (value.startswith("[") and value.endswith("]")): + if (value.startswith("{") and value.endswith("}")) or ( + value.startswith("[") and value.endswith("]") + ): try: allvalues["value"] = json.loads(value) except json.decoder.JSONDecodeError as e: - self.logger.info("[WARNING] Failed inner value cache parsing: %s" % e) + self.logger.info( + "[WARNING] Failed inner value cache parsing: %s" % e + ) allvalues["value"] = str(value) else: allvalues["value"] = str(value) @@ -2151,7 +2229,18 @@ def get_datastore_value(self, key, category=""): def get_ioc(self, ioc, data_type=""): if len(data_type) == 0: - ioc_types = ["domains", "urls", "email_addresses", "ipv4s", "ipv6s", "ipv4_cidrs", "md5s", "sha256s", "sha1s", "cves"] + ioc_types = [ + "domains", + "urls", + "email_addresses", + "ipv4s", + "ipv6s", + "ipv4_cidrs", + "md5s", + "sha256s", + "sha1s", + "cves", + ] iocs = find_iocs(str(ioc)) for key, value in iocs.items(): @@ -2183,8 +2272,8 @@ def get_ioc(self, ioc, data_type=""): if allvalues["success"] == True and len(allvalues["value"]) > 0: allvalues["found"] = True else: - allvalues["success"] = True - allvalues["found"] = False + allvalues["success"] = True + allvalues["found"] = False try: parsedvalue = json.loads(allvalues["value"]) @@ -2220,8 +2309,8 @@ def get_cache_value(self, key, category=""): if allvalues["success"] == True and len(allvalues["value"]) > 0: allvalues["found"] = True else: - allvalues["success"] = True - allvalues["found"] = False + allvalues["success"] = True + allvalues["found"] = False try: parsedvalue = json.loads(allvalues["value"]) @@ -2260,9 +2349,11 @@ def search_datastore_category(self, input_list, key, category): try: input_list = json.loads(str(input_list)) except Exception as e: - returnvalue["reason"] = f"Input list is not a valid JSON list: {input_list}", + returnvalue["reason"] = ( + f"Input list is not a valid JSON list: {input_list}", + ) returnvalue["details"] = str(e) - return returnvalue + return returnvalue org_id = self.full_execution["workflow"]["execution_org"]["id"] cnt = -1 @@ -2272,23 +2363,31 @@ def search_datastore_category(self, input_list, key, category): try: item = json.loads(str(item)) except Exception as e: - self.logger.info("[ERROR][%s] Failed to parse item as JSON: %s" % (self.current_execution_id, e)) + self.logger.info( + "[ERROR][%s] Failed to parse item as JSON: %s" + % (self.current_execution_id, e) + ) continue input_list[cnt] = item if key not in item: - returnvalue["reason"] = "Couldn't find key '%s' in every item. Make sure to use a key that exists in every entry." % (key), + returnvalue["reason"] = ( + "Couldn't find key '%s' in every item. Make sure to use a key that exists in every entry." + % (key), + ) return returnvalue - data.append({ - "workflow_id": self.full_execution["workflow"]["id"], - "execution_id": self.current_execution_id, - "authorization": self.authorization, - "org_id": org_id, - "key": str(item[key]), - "value": json.dumps(item), - "category": category, - }) + data.append( + { + "workflow_id": self.full_execution["workflow"]["id"], + "execution_id": self.current_execution_id, + "authorization": self.authorization, + "org_id": org_id, + "key": str(item[key]), + "value": json.dumps(item), + "category": category, + } + ) url = f"{self.url}/api/v2/datastore?bulk=true&execution_id={self.current_execution_id}&authorization={self.authorization}" response = requests.post(url, json=data, verify=False) @@ -2300,16 +2399,18 @@ def search_datastore_category(self, input_list, key, category): return returnvalue data = "" - try: + try: data = response.json() except json.decoder.JSONDecodeError as e: - return response.text + return response.text if "keys_existed" not in data: - returnvalue["error"] = "Invalid response from backend during bulk update of keys" + returnvalue["error"] = ( + "Invalid response from backend during bulk update of keys" + ) returnvalue["details"] = data - return returnvalue + return returnvalue not_found_keys = [] returnvalue["success"] = True @@ -2324,17 +2425,22 @@ def search_datastore_category(self, input_list, key, category): else: returnvalue["new"].append(datastore_item) - found = True + found = True break if not found: - print("[ERROR][%s] Key %s not found in datastore response, adding as new" % (self.current_execution_id, datastore_item[key])) - #returnvalue["new"].append(datastore_item) + print( + "[ERROR][%s] Key %s not found in datastore response, adding as new" + % (self.current_execution_id, datastore_item[key]) + ) + # returnvalue["new"].append(datastore_item) not_found_keys.append(datastore_item[key]) if len(not_found_keys) > 0: returnvalue["unhandled_keys"] = not_found_keys - returnvalue["reason"] = "Something went wrong updating the unhandled_keys. Please contact support@shuffler.io if this persists." + returnvalue["reason"] = ( + "Something went wrong updating the unhandled_keys. Please contact support@shuffler.io if this persists." + ) return json.dumps(returnvalue, indent=4) @@ -2347,7 +2453,7 @@ def set_cache_value(self, key, value, category=""): value = json.dumps(value) except Exception as e: self.logger.info(f"[WARNING] Error in JSON dumping (set cache): {e}") - + if not isinstance(value, str): value = str(value) @@ -2367,20 +2473,24 @@ def set_cache_value(self, key, value, category=""): try: allvalues = response.json() allvalues["key"] = key - #allvalues["value"] = json.loads(json.dumps(value)) + # allvalues["value"] = json.loads(json.dumps(value)) - allvalues["existed"] = False + allvalues["existed"] = False if "keys_existed" in allvalues: for key_info in allvalues["keys_existed"]: if key_info["key"] == key: allvalues["existed"] = key_info["existed"] break - if (value.startswith("{") and value.endswith("}")) or (value.startswith("[") and value.endswith("]")): + if (value.startswith("{") and value.endswith("}")) or ( + value.startswith("[") and value.endswith("]") + ): try: allvalues["value"] = json.loads(value) except json.decoder.JSONDecodeError as e: - self.logger.info("[WARNING] Failed inner value cache parsing: %s" % e) + self.logger.info( + "[WARNING] Failed inner value cache parsing: %s" % e + ) allvalues["value"] = str(value) else: allvalues["value"] = str(value) @@ -2393,12 +2503,17 @@ def set_cache_value(self, key, value, category=""): self.logger.info("Value couldn't be parsed") return response.text - def convert_json_to_tags(self, json_object, split_value=", ", include_key=True, lowercase=True): + def convert_json_to_tags( + self, json_object, split_value=", ", include_key=True, lowercase=True + ): if isinstance(json_object, str): try: json_object = json.loads(json_object) except json.decoder.JSONDecodeError as e: - self.logger.info("Failed to parse list2 as json: %s. Type: %s" % (e, type(json_object))) + self.logger.info( + "Failed to parse list2 as json: %s. Type: %s" + % (e, type(json_object)) + ) if isinstance(lowercase, str) and lowercase.lower() == "true": lowercase = True @@ -2413,7 +2528,11 @@ def convert_json_to_tags(self, json_object, split_value=", ", include_key=True, parsedstring = [] try: for key, value in json_object.items(): - if isinstance(value, str) or isinstance(value, int) or isinstance(value, bool): + if ( + isinstance(value, str) + or isinstance(value, int) + or isinstance(value, bool) + ): if include_key == True: parsedstring.append("%s:%s" % (key, value)) else: @@ -2462,7 +2581,7 @@ def cidr_ip_match(self, ip, networks): try: ip_networks = list(map(ipaddress.ip_network, networks)) - #ip_address = ipaddress.ip_address(ip, False) + # ip_address = ipaddress.ip_address(ip, False) ip_address = ipaddress.ip_address(ip) except ValueError as e: return "IP or some networks are not in valid format.\nError: {}".format(e) @@ -2471,8 +2590,8 @@ def cidr_ip_match(self, ip, networks): result = {} result["ip"] = ip - result['networks'] = list(map(str, matched_networks)) - result['is_contained'] = True if len(result['networks']) > 0 else False + result["networks"] = list(map(str, matched_networks)) + result["is_contained"] = True if len(result["networks"]) > 0 else False return json.dumps(result) @@ -2488,12 +2607,12 @@ def get_hash_sum(self, value): sha256_value = "" try: - md5_value = hashlib.md5(str(value).encode('utf-8')).hexdigest() + md5_value = hashlib.md5(str(value).encode("utf-8")).hexdigest() except Exception as e: pass try: - sha256_value = hashlib.sha256(str(value).encode('utf-8')).hexdigest() + sha256_value = hashlib.sha256(str(value).encode("utf-8")).hexdigest() except Exception as e: pass @@ -2504,14 +2623,17 @@ def get_hash_sum(self, value): "sha256": sha256_value, } - return parsedvalue + return parsedvalue def run_oauth_request(self, url, jwt): headers = { "Content-Type": "application/x-www-form-urlencoded", } - data = "grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&assertion=%s" % jwt + data = ( + "grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&assertion=%s" + % jwt + ) return requests.post(url, data=data, headers=headers, verify=False).text @@ -2519,10 +2641,9 @@ def run_oauth_request(self, url, jwt): def get_jwt_from_file(self, file_id, jwt_audience, scopes, complete_request=True): allscopes = scopes - if "," in scopes: allscopes = " ".join(scopes.split(",")) - + # Service account key path filedata = self.get_file(file_id) if filedata["success"] == False: @@ -2530,49 +2651,43 @@ def get_jwt_from_file(self, file_id, jwt_audience, scopes, complete_request=True "success": False, "message": f"Failed to get file for ID {file_id}", } - + data = json.loads(filedata["data"], strict=False) - #sa_keyfile = "" + # sa_keyfile = "" sa_keyfile = data["private_key"] sa_email = data["client_email"] - + # The audience to target audience = jwt_audience - + """Generates a signed JSON Web Token using a Google API Service Account or similar.""" - def get_jwt(sa_keyfile, - sa_email, - audience, - allscopes, - expiry_length=3600): - + + def get_jwt(sa_keyfile, sa_email, audience, allscopes, expiry_length=3600): now = int(time.time()) - + # build payload payload = { # expires after 'expiry_length' seconds. # iss must match 'issuer' in the security configuration in your # swagger spec (e.g. service account email). It can be any string. - 'iss': sa_email, + "iss": sa_email, # aud must be either your Endpoints service name, or match the value # specified as the 'x-google-audience' in the OpenAPI document. - 'scope': allscopes, - 'aud': audience, + "scope": allscopes, + "aud": audience, "exp": now + expiry_length, - 'iat': now, - + "iat": now, # sub and email should match the service account's email address - 'sub': sa_email, - 'email': sa_email, + "sub": sa_email, + "email": sa_email, } - + # sign with keyfile - #signer = crypt.RSASigner.from_service_account_file(sa_keyfile) + # signer = crypt.RSASigner.from_service_account_file(sa_keyfile) signer = crypt.RSASigner.from_string(sa_keyfile) jwt_token = jwt.encode(signer, payload) return jwt_token - - + signed_jwt = get_jwt(sa_keyfile, sa_email, audience, allscopes) if str(complete_request).lower() == "true": @@ -2601,11 +2716,18 @@ def get_synonyms(self, input_type): "uuid", "teamid", "messageid", - ], - "title": ["title", "message", "subject", "name"], - "description": ["description", "status", "explanation", "story", "details", "snippet"], - "email": ["mail", "email", "sender", "receiver", "recipient"], - "data": [ + ], + "title": ["title", "message", "subject", "name"], + "description": [ + "description", + "status", + "explanation", + "story", + "details", + "snippet", + ], + "email": ["mail", "email", "sender", "receiver", "recipient"], + "data": [ "data", "ip", "domain", @@ -2617,9 +2739,9 @@ def get_synonyms(self, input_type): "value", "item", "rules", - ], - "tags": ["tags", "taxonomies", "labels", "labelids"], - "assignment": [ + ], + "tags": ["tags", "taxonomies", "labels", "labelids"], + "assignment": [ "assignment", "user", "assigned_to", @@ -2627,40 +2749,44 @@ def get_synonyms(self, input_type): "closed_by", "closing_user", "opened_by", - ], - "severity": [ + ], + "severity": [ "severity", "sev", "magnitude", "relevance", - ] + ], } - + return [] - + def find_key(self, inputkey, synonyms): inputkey = inputkey.lower().replace(" ", "").replace(".", "") for key, value in synonyms.items(): if inputkey in value: return key - + return inputkey - + def run_key_recursion(self, json_input, synonyms): - if isinstance(json_input, str) or isinstance(json_input, int) or isinstance(json_input, float): + if ( + isinstance(json_input, str) + or isinstance(json_input, int) + or isinstance(json_input, float) + ): return json_input, {} - + if isinstance(json_input, list): if len(json_input) != 1: return json_input, {} else: json_input = json_input[0] - - #new_list = [] - #for item in json_input: - #run_key_recursion(item, synonyms) - #new_dict[new_key], found_important = run_key_recursion(value, synonyms) - + + # new_list = [] + # for item in json_input: + # run_key_recursion(item, synonyms) + # new_dict[new_key], found_important = run_key_recursion(value, synonyms) + # Looks for exact key:value stuff in other format if len(json_input.keys()) == 2: newkey = "" @@ -2670,54 +2796,58 @@ def run_key_recursion(self, json_input, synonyms): newkey = value elif key == "value": newvalue = value - + if len(newkey) > 0 and len(newvalue) > 0: json_input[newkey] = newvalue try: del json_input["name"] except: pass - + try: del json_input["value"] except: pass - + try: del json_input["key"] except: pass - + important_fields = {} new_dict = {} for key, value in json_input.items(): new_key = self.find_key(key, synonyms) - + if isinstance(value, list): new_list = [] for subitem in value: - returndata, found_important = self.run_key_recursion(subitem, synonyms) - + returndata, found_important = self.run_key_recursion( + subitem, synonyms + ) + new_list.append(returndata) for subkey, subvalue in found_important.items(): - important_fields[subkey] = subvalue - + important_fields[subkey] = subvalue + new_dict[new_key] = new_list - + elif isinstance(value, dict): # FIXMe: Try to understand Key:Values as well by translating them # name/key: subject # value: This is a subject # will become: # subject: This is a subject - - new_dict[new_key], found_important = self.run_key_recursion(value, synonyms) - + + new_dict[new_key], found_important = self.run_key_recursion( + value, synonyms + ) + for subkey, subvalue in found_important.items(): important_fields[subkey] = subvalue else: new_dict[new_key] = value - + # Translated fields are added as important if key.lower().replace(" ", "").replace(".", "") != new_key: try: @@ -2727,31 +2857,33 @@ def run_key_recursion(self, json_input, synonyms): important_fields[new_key] = new_dict[new_key] except: important_fields[new_key] = new_dict[new_key] - - #break - + + # break + return new_dict, important_fields - + # Should translate the data to something more useful def get_standardized_data(self, json_input, input_type): if isinstance(json_input, str): json_input = json.loads(json_input, strict=False) - + input_synonyms = self.get_synonyms(input_type) - parsed_data, important_fields = self.run_key_recursion(json_input, input_synonyms) - + parsed_data, important_fields = self.run_key_recursion( + json_input, input_synonyms + ) + # Try base64 decoding and such too? for key, value in important_fields.items(): try: important_fields[key] = important_fields[key][key] except: pass - + try: important_fields[key] = base64.b64decode(important_fields[key]) except: pass - + return { "success": True, "original": json_input, @@ -2773,14 +2905,16 @@ def generate_random_string(self, length=16, special_characters=True): if str(special_characters).lower() == "false": characters = string.ascii_letters + string.digits + string.punctuation - password = ''.join(random.choice(characters) for i in range(length)) + password = "".join(random.choice(characters) for i in range(length)) return { "success": True, "password": password, } - - def run_ssh_command(self, host, port, user_name, private_key_file_id, password, command): + + def run_ssh_command( + self, host, port, user_name, private_key_file_id, password, command + ): ssh_client = paramiko.SSHClient() ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) @@ -2795,60 +2929,74 @@ def run_ssh_command(self, host, port, user_name, private_key_file_id, password, try: key_data = new_file["data"].decode() except Exception as e: - return {"success":"false","message":str(e)} + return {"success": "false", "message": str(e)} private_key_file = StringIO() private_key_file.write(key_data) private_key_file.seek(0) private_key = paramiko.RSAKey.from_private_key(private_key_file) - + try: - ssh_client.connect(hostname=host,username=user_name,port=port, pkey= private_key) + ssh_client.connect( + hostname=host, username=user_name, port=port, pkey=private_key + ) except Exception as e: - return {"success":"false","message":str(e)} + return {"success": "false", "message": str(e)} else: try: - ssh_client.connect(hostname=host,username=user_name,port=port, password=str(password)) + ssh_client.connect( + hostname=host, username=user_name, port=port, password=str(password) + ) except Exception as e: - return {"success":"false","message":str(e)} + return {"success": "false", "message": str(e)} try: stdin, stdout, stderr = ssh_client.exec_command(str(command)) try: - errorLog = stderr.read().decode(errors='ignore') + errorLog = stderr.read().decode(errors="ignore") except Exception as e: errorLog = f"Failed to read stderr {e}" try: - output = stdout.read().decode(errors='ignore') + output = stdout.read().decode(errors="ignore") except Exception as e: output = f"Failed to read stdout {e}" except Exception as e: - return {"success":"false","message":str(e)} + return {"success": "false", "message": str(e)} - return {"success":"true","output": output, "error_logs": errorLog} + return {"success": "true", "output": output, "error_logs": errorLog} def cleanup_ioc_data(self, input_data): # Remove unecessary parts like { and }, quotes etc input_data = str(input_data) input_data = input_data.replace("{", "") input_data = input_data.replace("}", "") - input_data = input_data.replace("\"", "") + input_data = input_data.replace('"', "") input_data = input_data.replace("'", "") input_data = input_data.replace("\t", " ") input_data = input_data.replace(" ", " ") input_data = input_data.replace("\n\n", "\n") - # Remove html tags - input_data = re.sub(r'<[^>]*>', '', input_data) + # Remove html tags + input_data = re.sub(r"<[^>]*>", "", input_data) return input_data - def parse_ioc(self, input_string, input_type="all"): - ioc_types = ["domains", "urls", "email_addresses", "ipv4s", "ipv6s", "ipv4_cidrs", "md5s", "sha256s", "sha1s", "cves"] - #ioc_types = ["ipv4s"] + ioc_types = [ + "domains", + "urls", + "email_addresses", + "ipv4s", + "ipv6s", + "ipv4_cidrs", + "md5s", + "sha256s", + "sha1s", + "cves", + ] + # ioc_types = ["ipv4s"] try: input_string = self.cleanup_ioc_data(input_string) @@ -2874,21 +3022,24 @@ def parse_ioc(self, input_string, input_type="all"): new_input_types.append(item) - ioc_types = new_input_types + ioc_types = new_input_types if len(ioc_types) == 0: input_type = "all" # Not used for anything after cleanup fixes - max_size = 7500000 - #if len(input_string) > max_size: + max_size = 7500000 + # if len(input_string) > max_size: # input_string = input_string[:max_size] - self.logger.info("[DEBUG] Parsing data of length %d with types %s. Max size: %d" % (len(input_string), ioc_types, max_size)) + self.logger.info( + "[DEBUG] Parsing data of length %d with types %s. Max size: %d" + % (len(input_string), ioc_types, max_size) + ) self.logger.info(f"STRING: {input_string}") - #iocs = find_iocs(str(input_string), included_ioc_types=ioc_types) + # iocs = find_iocs(str(input_string), included_ioc_types=ioc_types) iocs = find_iocs(str(input_string)) - #self.logger.info("[DEBUG] Found %d ioc types" % len(iocs)) + # self.logger.info("[DEBUG] Found %d ioc types" % len(iocs)) newarray = [] for key, value in iocs.items(): @@ -2925,7 +3076,9 @@ def parse_ioc(self, input_string, input_type="all"): elif "ip" in item["data_type"]: item["data_type"] = "ip" try: - item["is_private_ip"] = ipaddress.ip_address(item["data"]).is_private + item["is_private_ip"] = ipaddress.ip_address( + item["data"] + ).is_private except: pass @@ -2935,25 +3088,24 @@ def parse_ioc(self, input_string, input_type="all"): return "Failed to parse IOC's: %s" % e return newarray - def split_text(self, text): # Split text into chunks of 10kb. Add each 10k to array # In case e.g. 1.2.3.4 lands exactly on 20k boundary, it may be useful to overlap here. # (just shitty code to reduce chance of issues) while still going fast arr_one = [] - max_len = 5000 + max_len = 5000 current_string = "" - overlaps = 100 + overlaps = 100 for i in range(0, len(text)): current_string += text[i] if len(current_string) > max_len: # Appending just in case even with overlaps - if len(text) > i+overlaps: - current_string += text[i+1:i+overlaps] + if len(text) > i + overlaps: + current_string += text[i + 1 : i + overlaps] else: - current_string += text[i+1:] + current_string += text[i + 1 :] arr_one.append(current_string) current_string = "" @@ -2961,13 +3113,13 @@ def split_text(self, text): if len(current_string) > 0: arr_one.append(current_string) - return arr_one + return arr_one def _format_result(self, result): final_result = {} - + for res in result: - for key,val in res.items(): + for key, val in res.items(): if key in final_result: if isinstance(val, list) and len(val) > 0: for i in val: @@ -2975,7 +3127,7 @@ def _format_result(self, result): elif isinstance(val, dict): if key in final_result: if isinstance(val, dict): - for k,v in val.items(): + for k, v in val.items(): val[k].append(v) else: final_result[key] = val @@ -2985,36 +3137,49 @@ def _format_result(self, result): # See function for how it works~: parse_ioc_new(..) def _with_concurency(self, array_of_strings, ioc_types): results = [] - #start = time.perf_counter() + # start = time.perf_counter() # Workers dont matter..? - # What can we use instead? + # What can we use instead? workers = 4 with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: # Submit the find_iocs function for each string in the array - futures = [executor.submit( - find_iocs, - text=string, - included_ioc_types=ioc_types, - ) for string in array_of_strings] + futures = [ + executor.submit( + find_iocs, + text=string, + included_ioc_types=ioc_types, + ) + for string in array_of_strings + ] # Wait for all tasks to complete concurrent.futures.wait(futures) # Retrieve the results if needed results = [future.result() for future in futures] - + return self._format_result(results) # FIXME: Make this good and actually faster than normal # For now: Concurrency doesn't make it faster due to GIL in python. - # May need to offload this to an executable or something + # May need to offload this to an executable or something def parse_ioc_new(self, input_string, input_type="all"): if input_type == "": input_type = "all" - ioc_types = ["domains", "urls", "email_addresses", "ipv4s", "ipv4_cidrs", "md5s", "sha256s", "sha1s", "cves"] + ioc_types = [ + "domains", + "urls", + "email_addresses", + "ipv4s", + "ipv4_cidrs", + "md5s", + "sha256s", + "sha1s", + "cves", + ] if input_type == "" or input_type == "all": ioc_types = ioc_types @@ -3028,7 +3193,9 @@ def parse_ioc_new(self, input_string, input_type="all"): input_string = str(input_string) if len(input_string) > 10000: - iocs = self._with_concurency(self.split_text(input_string), ioc_types=ioc_types) + iocs = self._with_concurency( + self.split_text(input_string), ioc_types=ioc_types + ) else: iocs = find_iocs(input_string, included_ioc_types=ioc_types) @@ -3037,7 +3204,7 @@ def parse_ioc_new(self, input_string, input_type="all"): if input_type != "all": if key not in input_type: continue - + if len(value) == 0: continue @@ -3071,7 +3238,9 @@ def parse_ioc_new(self, input_string, input_type="all"): newarray[i]["data_type"] = "ip" try: - newarray[i]["is_private_ip"] = ipaddress.ip_address(item["data"]).is_private + newarray[i]["is_private_ip"] = ipaddress.ip_address( + item["data"] + ).is_private except Exception as e: pass @@ -3085,15 +3254,12 @@ def parse_ioc_new(self, input_string, input_type="all"): def merge_incoming_branches(self, input_type="list"): wf = self.full_execution["workflow"] if "branches" not in wf or not wf["branches"]: - return { - "success": False, - "reason": "No branches found" - } + return {"success": False, "reason": "No branches found"} if "results" not in self.full_execution or not self.full_execution["results"]: return { "success": False, - "reason": "No results for previous actions not found" + "reason": "No results for previous actions not found", } if not input_type: @@ -3101,7 +3267,7 @@ def merge_incoming_branches(self, input_type="list"): branches = wf["branches"] cur_action = self.action - #print("Found %d branches" % len(branches)) + # print("Found %d branches" % len(branches)) results = [] for branch in branches: @@ -3134,13 +3300,13 @@ def merge_incoming_branches(self, input_type="list"): continue newlist.append(subitem) - #newlist.append(item) + # newlist.append(item) results = newlist elif input_type == "dict": new_dict = {} for item in results: - if not isinstance(item, dict): + if not isinstance(item, dict): continue new_dict = self.merge_lists(new_dict, item) @@ -3149,7 +3315,7 @@ def merge_incoming_branches(self, input_type="list"): else: return { "success": False, - "reason": "No results from source branches with type %s" % input_type + "reason": "No results from source branches with type %s" % input_type, } return results @@ -3158,10 +3324,7 @@ def bodyparse_test(self, body): return body def list_cidr_ips(self, cidr): - defaultreturn = { - "success": False, - "reason": "Invalid CIDR address" - } + defaultreturn = {"success": False, "reason": "Invalid CIDR address"} if not cidr: return defaultreturn @@ -3187,11 +3350,7 @@ def list_cidr_ips(self, cidr): return defaultreturn ips = [str(ip) for ip in net] - returnvalue = { - "success": True, - "amount": len(ips), - "ips": ips - } + returnvalue = {"success": True, "amount": len(ips), "ips": ips} return returnvalue @@ -3214,12 +3373,13 @@ def switch(self, conditions): # Loop conditions # Return them without a loop to make it EASY to understand - # Validation should be: + # Validation should be: # Continuation based on .id.valid # .valid -> true/false # If no id exists, use name? return to_return + if __name__ == "__main__": Tools.run() From 295269865cdab35f1be1dda5b3492e2f9ec6fd54 Mon Sep 17 00:00:00 2001 From: Frikky Date: Tue, 10 Feb 2026 12:29:52 +0100 Subject: [PATCH 11/15] Added run_agent action --- shuffle-ai/1.0.0/src/app.py | 105 ++++++++++++++++++++++----- shuffle-tools/1.2.0/requirements.txt | 2 +- shuffle-tools/1.2.0/src/app.py | 4 + 3 files changed, 91 insertions(+), 20 deletions(-) diff --git a/shuffle-ai/1.0.0/src/app.py b/shuffle-ai/1.0.0/src/app.py index 8313ab6e..e5f5e9bc 100644 --- a/shuffle-ai/1.0.0/src/app.py +++ b/shuffle-ai/1.0.0/src/app.py @@ -427,6 +427,45 @@ def gpt(self, input_text): "reason": "Not implemented yet" } + def run_agent(self, input_data, actions=None, apps=None): + prepared_format = { + "id": self.action["id"], + "params": { + "tool_name": self.action["app_name"], + "tool_id": self.action["app_id"], + "environment": self.action["environment"], + "input": { + "text": input_data, + } + }, + } + + if actions: + prepared_format["params"]["tool_name"] = actions + + if apps: + pass + + baseurl = f"{self.url}/api/v1/agent?execution_id={self.current_execution_id}&authorization={self.authorization}&action_id={self.action['id']}" + self.logger.info("[DEBUG] Running agent action with URL '%s'" % (baseurl)) + + headers = {} + request = requests.post( + baseurl, + json=prepared_format, + headers=headers, + ) + + # Random sleep timer to force delay + time.sleep(2) + # Gets into waiting state on backend + return json.dumps({ + "app_run": True, + "input_prompt": prepared_format, + "status": request.status_code, + "body": request.text, + }) + def run_schemaless(self, category, action, app_name="", fields=""): self.logger.info("[DEBUG] Running schemaless action with category '%s' and action label '%s'" % (category, action)) @@ -477,27 +516,55 @@ def run_schemaless(self, category, action, app_name="", fields=""): else: fields = str(fields).strip() - if not fields.startswith("{") and not fields.startswith("["): - fields = json.dumps({ - "data": fields, - }) - - try: - loadedfields = json.loads(fields) - for key, value in loadedfields.items(): - data["fields"].append({ - "key": key, - "value": value, + # Valid format: + # {"field1": "value1", "field2": "value2"} + # field1=value1&field2=value2 + # field1:value1\nfield2:value2 + + cursplit = None + if "\\n" in fields and not fields.startswith("{") and not fields.startswith("["): + cursplit = "\\n" + elif ("=" in fields or ":" in fields) and not fields.startswith("{") and not fields.startswith("["): + cursplit = "&" + + if cursplit: + newfields = [] + for line in fields.split(cursplit): + splitkey = None + if "=" in line: + splitkey = "=" + elif ":" in line: + splitkey = ":" + + if splitkey: + parts = line.split(splitkey, 1) + newfields.append({ + "key": parts[0].strip(), + "value": splitkey.join(parts[1:]).strip(), + }) + + data["fields"] = newfields + else: + if not fields.startswith("{") and not fields.startswith("["): + fields = json.dumps({ + "data": fields, }) - except Exception as e: - self.logger.info("[ERROR] Failed to load fields as JSON: %s" % e) - return json.dumps({ - "success": False, - "reason": "Ensure 'Fields' are valid JSON", - "details": "%s" % e, - }) - + try: + loadedfields = json.loads(fields) + for key, value in loadedfields.items(): + data["fields"].append({ + "key": key, + "value": value, + }) + + except Exception as e: + self.logger.info("[ERROR] Failed to load fields as JSON: %s" % e) + return json.dumps({ + "success": False, + "reason": "Ensure 'Fields' are valid JSON", + "details": "%s" % e, + }) #baseurl = "%s/api/v1/apps/categories/run" % self.base_url baseurl = "%s/api/v1/apps/categories/run" % self.url diff --git a/shuffle-tools/1.2.0/requirements.txt b/shuffle-tools/1.2.0/requirements.txt index 4fe45011..94c838b3 100644 --- a/shuffle-tools/1.2.0/requirements.txt +++ b/shuffle-tools/1.2.0/requirements.txt @@ -8,4 +8,4 @@ json2xml==5.0.5 ipaddress==1.0.23 google.auth==2.37.0 paramiko==3.5.0 -shuffle-sdk==0.0.31 +shuffle-sdk==0.0.33 diff --git a/shuffle-tools/1.2.0/src/app.py b/shuffle-tools/1.2.0/src/app.py index 85275cf2..0bea8a3f 100644 --- a/shuffle-tools/1.2.0/src/app.py +++ b/shuffle-tools/1.2.0/src/app.py @@ -235,6 +235,10 @@ def send_email_shuffle(self, apikey, recipients, subject, body, attachments=""): def repeat_back_to_me(self, call): return call + def repeat_back_to_me2(self, body): + print("call:", body) + return body + def dedup_and_merge(self, key, value, timeout, set_skipped=True): timeout = int(timeout) key = str(key) From e5b00f775e1fed039115bd8299ab049fc3c7b285 Mon Sep 17 00:00:00 2001 From: Frikky Date: Tue, 10 Feb 2026 12:36:10 +0100 Subject: [PATCH 12/15] Bumped shuffle-ai to be 1.1.0 to force re-downloads --- shuffle-ai/1.0.0/Dockerfile | 54 --- shuffle-ai/1.0.0/api.yaml | 167 -------- shuffle-ai/1.0.0/requirements.txt | 5 - shuffle-ai/1.0.0/src/app.py | 609 ------------------------------ shuffle-ai/1.0.0/upload.sh | 16 - 5 files changed, 851 deletions(-) delete mode 100644 shuffle-ai/1.0.0/Dockerfile delete mode 100644 shuffle-ai/1.0.0/api.yaml delete mode 100644 shuffle-ai/1.0.0/requirements.txt delete mode 100644 shuffle-ai/1.0.0/src/app.py delete mode 100755 shuffle-ai/1.0.0/upload.sh diff --git a/shuffle-ai/1.0.0/Dockerfile b/shuffle-ai/1.0.0/Dockerfile deleted file mode 100644 index 9b059f27..00000000 --- a/shuffle-ai/1.0.0/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -FROM python:3.10-alpine - -# Install all alpine build tools needed for our pip installs -#RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git poppler-utils - -# Install all of our pip packages in a single directory that we can copy to our base image later -RUN mkdir /install -WORKDIR /install - -# Switch back to our base image and copy in all of our built packages and source code -#COPY --from=builder /install /usr/local -COPY src /app -COPY requirements.txt /requirements.txt -RUN python3 -m pip install -r /requirements.txt - -# Install any binary dependencies needed in our final image -# RUN apk --no-cache add --update my_binary_dependency -RUN apk --no-cache add jq git curl - -ENV SHELL=/bin/bash - -### Install Tesseract -ENV CC /usr/bin/clang -ENV CXX /usr/bin/clang++ -ENV LANG=C.UTF-8 -ENV TESSDATA_PREFIX=/usr/local/share/tessdata - -# Dev tools -WORKDIR /tmp -RUN apk update -RUN apk upgrade -RUN apk add file openssl openssl-dev bash tini leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev libgcc mupdf-dev jbig2dec-dev -RUN apk add freetype-dev openblas-dev ffmpeg-dev linux-headers aspell-dev aspell-en # enchant-dev jasper-dev -RUN apk add --virtual .dev-deps git clang clang-dev g++ make automake autoconf libtool pkgconfig cmake ninja -RUN apk add --virtual .dev-testing-deps -X http://dl-3.alpinelinux.org/alpine/edge/testing autoconf-archive -RUN ln -s /usr/include/locale.h /usr/include/xlocale.h - -RUN apk add tesseract-ocr -RUN apk add poppler-utils - -# Install from main -RUN mkdir /usr/local/share/tessdata -RUN mkdir src -RUN cd src -RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata -RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git - -#RUN curl -fsSL https://ollama.com/install.sh | sh -#RUN ollama pull llama3.2 -#RUN cd tesseract && ./autogen.sh && ./configure --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl && make && make install && cd /tmp/src - -# Finally, lets run our app! -WORKDIR /app -CMD ["python", "app.py", "--log-level", "DEBUG"] diff --git a/shuffle-ai/1.0.0/api.yaml b/shuffle-ai/1.0.0/api.yaml deleted file mode 100644 index b9634f39..00000000 --- a/shuffle-ai/1.0.0/api.yaml +++ /dev/null @@ -1,167 +0,0 @@ ---- -app_version: 1.0.0 -name: Shuffle AI -description: An EXPERIMENTAL AI tool app for Shuffle -tags: - - AI - - Shuffle - - LLM -categories: - - AI - - LLM - - Shuffle -contact_info: - name: "@frikkylikeme" - url: https://shuffler.io - email: support@shuffler.io -actions: - #- name: run_llm - # description: "Runs a local LLM, with a GPU or CPU (slow). Default model is set up in Dockerfile" - # parameters: - # - name: input - # description: "The input question to the model" - # required: true - # multiline: true - # example: "" - # schema: - # type: string - # - name: system_message - # description: "The system message use, if any" - # required: false - # multiline: false - # example: "" - # schema: - # type: string - - - name: shuffle_cloud_inference - description: Input ANY kind of data in the format you want, and the format you want it in. Default is a business-y email. Uses ShuffleGPT, which is based on OpenAI and our own model. - parameters: - - name: apikey - description: Your https://shuffler.io apikey - required: true - multiline: false - example: "" - schema: - type: string - - name: text - description: The text you want to be converted (ANY format) - required: true - multiline: true - example: "Bad IPs are 1.2.3.4 and there's no good way to format this. JSON works too!" - schema: - type: string - - name: formatting - description: The format to use. - required: false - multiline: true - example: "Make it work as a ticket we can put in service now that is human readable for security analysts" - schema: - type: string - returns: - schema: - type: string - - name: generate_report - description: Input ANY kind of data in the format you want, and it will make an HTML report for you. This can be downloaded from the File location. - parameters: - - name: apikey - description: Your https://shuffler.io apikey - required: true - multiline: false - example: "" - schema: - type: string - - name: input_data - description: The text you want to be converted (ANY format) - required: true - multiline: true - example: "Bad IPs are 1.2.3.4 and there's no good way to format this. JSON works too!" - schema: - type: string - - name: report_title - description: The report title to be used in the report - required: true - multiline: true - example: "Statistics for October" - schema: - type: string - - name: report_name - description: The name of the HTML file - required: false - multiline: true - example: "statistics.html" - schema: - type: string - returns: - schema: - type: string - - name: extract_text_from_pdf - description: Returns text from a pdf - parameters: - - name: file_id - description: The file to find text in - required: true - multiline: false - example: "file_" - schema: - type: string - returns: - schema: - type: string - - name: extract_text_from_image - description: Returns text from an image - parameters: - - name: file_id - description: The file to find text in - required: true - multiline: false - example: "file_" - schema: - type: string - returns: - schema: - type: string - - name: run_schemaless - description: Runs an automatically translated action - parameters: - - name: category - description: The category the action is in - required: true - multiline: false - schema: - type: string - - name: action - description: The action label to run - required: true - multiline: false - schema: - type: string - - name: app_name - description: The app to run the action in - required: false - multiline: false - schema: - type: string - - name: fields - description: The additional fields to add - required: false - multiline: false - schema: - type: string - returns: - schema: - type: string - - name: transcribe_audio - description: Returns text from audio - parameters: - - name: file_id - description: The file containing the audio - required: true - multiline: false - example: "file_" - schema: - type: string - returns: - schema: - type: string - -large_image: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAK4AAACuCAYAAACvDDbuAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAABmJLR0QA/wD/AP+gvaeTAAAAB3RJTUUH5AgXDjM6hEZGWwAAD+lJREFUeNrtXb/vJTcRH7/v3iVBCqRBiCAQAtHwq4AWRElHwX8AoqbmXwDRpiH/QyQkGoogUSAhKIKUAE1IdSRSREhQQk7c3XtD8X55vePxjNfe3bk3H+nu+96uPf54POtnj8fe8OQX30JwOIxhtzYBh6MGOsPF0z9p2iWwpd8LjX6W5vWUYaiqlBuvLT5b5TQDPlRwmMSAABBg+kCer+XuAeQf4tL9tAxJ/hIfZGSm8rhyEfjytfxr9FeSX+KjvVfipNVpWlaPNhsAEPCS7Ao8FYnRlbO4ksLnjiSQvIanv4FNjwJ5pXIlMq6MQpIqqPnQKQKbjuPDtZlG55o6UHXWtVncZZTbbNBVB1P5dJYguCbJJ1WjOG8PVOioSm5HPrVt1rwuyN+K+PSZnNV1M/MmEFubfFjjU9tmK9XBJ2cOk3DDdZiEG67DJOrGuA7HyvAe12ESAxa73KPrN1z8gUikCCdvcD5NXnpQpA8nNhh9m5Yn4ZMrV8dHV/8a/dRA0x419a3lI9GBtM2GcrGYFXRNUU5TyluTOpdXwqeUt6YOpby9DUTLZylOcRlzdBTf2yV3ZBFOmKSHQh5KpjSSSpqG4s6VkUubqw8W8knTSnWk0Y+2jF5tlmuDUloJn6T8gRVcEpJ+3srChHSNt8RJsq4p+S41LC13KTcu/RJt1pLPKY1Pzhwm4YbrMAk3XIdJTMe4aeCjJhBVk0YiQ1MWZHhLgmO5QNVWfKRlavlIIQnurQmcnaMjSbBxhtMwYUxODpLcl2tUhvPlNE6VkiuoFVLXKT6ZfBjxRIIzOSlgWpLSB8uZ0g3BjeVDlFGEos0mfKKL7CQrY2ES7pM2i/OX22w4/sWReEhEnUOTxx3a+FrawQGZh04/rWe6oJBKo5zT4zLjPHE9ZHym5YzToogzfQcmfLgOhuLF/Sjm2izVDyXnrKtcmmmdaKumf+RyCw5Xn7OmzQaJF0fiEZG6BjXpYUYaSVkaPrXeHe4eVaZEr3Prqrmmrbc2T8lrmOMjn5xJHeJLYkk+PfzNTxOflrwF0EeHbU0Zt2wsW+PTkncB7g5zmMSwzUfS4eDhPa7DJK5jXGorsnZxonbRIbeAoOUjkUvlp+qxFp9YNuWL0nBqsVCkqUsrHQnuX+Nx5/qcJDI0kWgtJh7ihYCN8aG+13DqOXlbWUfD+fN0AUEmp3RcUWlVEwCynb5ssYLnxHViJT6ULCykb8EnzUfpqBWfVAdcnt5tprGhIe10WnjHpB2FtMPWcpM66yXyOad4Lz4Srq34SHhwZfRos1w9Y/jkzGESvj3dYRLe4zpMwg3XYRJuuA6T4M/Hzfk/OGd9OP2HOE2f8wtBlCebJrkfp+Gc3AGmiSiuaVlpwkmajL4osPUm9FMqIzBOJolfjGuzEtdUwWl53Dm7Eh9pzIdps+FiYJyi1N+Rvs/6OLCQBul8Ip8R08ik3EwhLZz1Wv8XmU7ZZqX7OT2gUIB2oaRBm+2ovDm5nM+ulEeiD8yka8UnJ1PCP82r9YWW8iCU5XO8W/PhPmvllNKW7lEyszsgNKuzkspJFZFL15uPtIweq7A1xiKpz1J8tGXP+dE53/fJmcMk6hcgJO8XqokEKi5uYzTG29LqSev95JqyKsoOOxjNpKQBD7VFc5GBJRsi+NQHkkv6+7m/UxTufwLCCy+CbAruyOLDdwEf/uf6vbbNJukzlogZC6wMdhAcM7ohHPawe/GrcO+HPwe4u782G7sIAE9++0vYv/YKwO6usfCaka0etgwXAGB3D8JznwIYnlmbiW0M92FbQy0d+MmZ3Xo5JDDcvuXJ2ZYqtyUuTwuM6nSXctcufHCOZqkjPScXhbIcdeD0XUpfKyNNy8nlyhuozLkM8XxR6pjm7tc4Fdx620I7lWq10JCm0ZanWoBwm3FsBe1WznpadbTg4A9PI2xx7FUKHopQjg7TKqNnpbioIUcFUGUsy1CS8fFYBYdJuOE6TMIN12ESgyiKiwO1bQOJe1w+6p42Etmhwmi6kLZXfC2G9IUj2vulY2wIPrv4onRhIXcRqS0DiWxkhF0uIb37wG22LRCSuVCyekC2GSXj9CG3YyT+krWh+KPAhkTvgGDKqbqnWbBwY+2Pnm3Wy4aMRYc1MuPDvp0skwgAh8PaJGbh5k4kx0f/hce/ewnw/QenXQCTFJDfQy45PzFNn5NHsoPy/u6gzE+nObzz91P9Z+6kWAm2zg6bDMoq8OQxHN78Axze/htAaB1EbQhhdzyfgRqIGoCxoUIjhDuA3ZDpcR0W4C3nMInbNVw7v4oOAsehArVFPL0uOjMM+DlM+pk7t7/BDuwcJsM6gcM7WweOX05nFCHNi12ASRfLo3QaX9O0GWTylOTnZIMwf4YPPTlD4iMm7aZwAGOUf3Rf48wjHNzVOMkKFA8pp0RHZ1mjdihs5R61PWbsWlphgs/E5gptNvFfSLY8QPk7dVbh+UNg8qfnJsZ8Bo0hzF0Y2Nqvc0s+Vbs5YL5OLfPRcorT2hvjtuxyHWZhzHCX6AMcFtB2B0RvtKZqqe6OEYz1uA7HEbdruN7ZmsZtGq4brXnQhlsbLFkDrY9mC9giH41/dSlONfeEIBcgss7nXopInPdkYN95J3XD1bMgkJUNFOxsDNLgyiynhYyX5dnAhnLyhzmO4V7IO8+xyZEgx5UqvJ41rOUTdhBOr2w6KjZc+B1FBkLGVUoAABQEcmPu6rPPw73v/gh2n/wMANYEhAd4/NqvYf/Wn5pEyPW2IUrOzQWSHyHdkEJgN8D97/0Edp/7GgDu9fnDDvD9t+HRqy8BPvxQ9i6xEXUEuPcMDF//Puw+/aVqDewfvA77f/zx9M40e7jNeNw5CDu4++K34e4r36kWcXj3TYDfvwz8D79ml1clDPuxx9FhuUik0rblVihFWLX+7ZFEXE2ioLBNg9fUSRopVsOjJbioskZlDuyAvmflpOWsOUNu/cBQ8jW/1A0np11RG+GjwG36cQHqFWnBcG4Axgx37d/I1uXXcvCnx6BXoQXf3mOAzvVpooJzaOcWdKBH1fZ07dCsFZpNgmfZbaOJ2dxnpwkNFC3C9MBcGxo0OugxwV8LWKm5lg9sFQdszKGhLAla2dCuduuOZcypx+UXdk0OK5e/hXKNTc4cjiPGhtvTX1njI6Z2+vbuKtaKspLooXdkXs1u5yUR7/LdROMsraSSIfTa6pqWodE9Mvla6sCI8d7uUMEXIEzjdg3XYRr2osOePIbDR+9BGO7re78QAD/+AODwpK5sBDg6dGyGAtL1sYnLGDe3+2BNTNycYQf7B2/Aw5d/XB9HejjA4YN3jgHUNQ132MOTv/wG9v98A+CgFBCO/+FH/wJ89PBaSY1OULZzQyQL2skayVwg/7Dk3Ky2IlcEgEcfw/7dt+YJnRP1f9jDoz+/AvM0FU4c1u8mes59e+ZXDhXmPE+tForD+lH73Q6EluiozfaldnzWQUWQzdprPk87lg44nkTKN+DT/10S7lW4VYz8wWucOTAPtl5e4mgfjmu0/b3HdZiEG67DJNxwbxlGhwkAuZeXAJS3Qpfemq7dds1tS5dsbc6dAyQpS5uGe+lKrJLSGUqlCb2GcwUuCxBzt71T2/g7t9mQniofv0yjWOtMYdSLM6Sy0pd5iLdFSQtUyiJtRnjmGOdhqq5bo5WzUXAYzns2Lu2tjaqb0WaTHRBrR9cvEVG4VF3WkLsGnzXqohzjbk3dt4hG/jDDxy8BLL5y5miBZi1wa9vT14dJ0o2qft6/1GhQZ1SV9uJxd3cQ7j+XD7RJ40JK38/XAPKz4ly+OG+KwOTDwn0uDSKEZ58/vgH+hmHLcA97uPvCN+G5H/wMoCaQ/KkAAtzdg/DCZ9cmsipsGS4ce5u7z38DYHhmbTL2YfjBH28DOM80s+MoxllVvfkwKudSbiL0dB0NTya2iGpNYmIzl+/EdexjQ8PEGE4FhdPHMAlbLhcsdWaPnfDEAxQJnbx53TEPJ51j3N7CrEfbSNt+arzXt57X2RBx94LsUGHOGRQtF7Fa8HFQQOabJmc5XQ8b8iAbh0mYNFzvdefD+nRhyPowqWitc2VbRyutGCF18+ilU2mEXWX51zFuKbqlZ/RLy0gixzagiS6sgL2hghuwAywarsMBxgzXO9u2sBzZWHwHRLwrQ5rWYQBIfuwCKnZJEpvEYSg9dRoncnejtdxFbBRLqFQzr5fSudH3nDmOaH26yHIwNcZ1NIZNmwWArYU1Fg8HDLB/7wH879VfAey2Rd0a9g/+2ubUyZUOdAz//umXjT136GPd2cDNnM9bC4Pd1gbOx3WsDh/jOkzCDddhEpcjmKiFhvGLQwDitJNrYTz05H7MS+N56hiq0mbYCfeIj2STb2s+cSJEOrguJ4fScaneOW7kOWZJm4VCmaPFg8wKgcSGuLpzR49Rerm8vIRaaECgvyB1Tbl9qOZoMiykHeVhVoZKwW9N+CSJuPwsH4YY12aTa5TxYyZPpsxSDG/Rhgp1lyxUnK/7UMFhEm64DpNIlnzTAdXcsJml8rdO1yt/K+R45EJUluS9zHaWITuQJb9rsVT+HvuKe+RvhdIIcE3ey4Rj+VDBYRJuuA6TcMN1mMT15SWMZ5h10Oc86+dr50s14QWch7rEh5PHef+psgsyqB0iI2e+hE+pDlpvvkQ/uVUMDfdSnTq12TA58injFUdOMPB5AeiALtHcUrstXrqSINnaoVjxyE5ra1ZipHMsTV2kMiQ8NDw7tdmqQ4WtzNEd9uBjXIdJuOE6TMLoy0sct46KHndNS6d2pW5tp+rW+Jw5rVl2qpP5Oqrcnr52w9RMgbfA8db5tAsp8DGuwyTaGW6DB7ppn9CCzxKnvKz9Kz7j/prUi0cwqQLQDBtvrp5uvMc/Wf00oFAT5FjscbcwMloCt1LPWvTUT41sH+M6TMIN12ESw3UPd8gPtrh7JeTyXvZGn0KD0jSlMms5Sfhw92vkUvXT5tPWt3WbSfjMsSFl3ujlJdy+4xkjnFze+PWrNWXWclqaT6t82vq2bjMJnzk2pMzrQwWHSbjhOkzCDdchxpZchpezwySQvHhiyVMLevPRctXwqeWmfcv5GaVTGKRy557YIHnhpETeoCl05grhbPlL89HK1vCp5darvZbgo+XEwYcKDpNww3WYxC6/U5PY5oun66MzPHH8L05PpqHKghn+TpjyictkZQLPh4u6yeknvXeWU+JD6TDHJ/cbn93Bi8nnDKdJm8EG2+zIZwBudlbjUOYOpj1frClPwyf3OZuXuaEx3lgWZixKxIfZ911rvJO65PRFVmZjbYY+VHDYhBuuwyTccB0mcdkB0cr5z70pW/pm7Bo+LesgqUsrPjVye9WXkqld8FiizRCi6LBWjmTRPGGG/JZ5ejvoa1ai1qwvlWarbeZDBYdJuOE6TKKP4W7xJdFb4+R8ZvH5P852gxhpwOZ9AAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDIwLTA4LTIzVDE0OjUyOjAwKzAyOjAwetRgVgAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyMC0wOC0yM1QxNDo1MTo1OCswMjowMJuxI+oAAAAASUVORK5CYII= diff --git a/shuffle-ai/1.0.0/requirements.txt b/shuffle-ai/1.0.0/requirements.txt deleted file mode 100644 index 62bee6e9..00000000 --- a/shuffle-ai/1.0.0/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -shuffle-sdk - -pytesseract -pdf2image -pypdf2 diff --git a/shuffle-ai/1.0.0/src/app.py b/shuffle-ai/1.0.0/src/app.py deleted file mode 100644 index e5f5e9bc..00000000 --- a/shuffle-ai/1.0.0/src/app.py +++ /dev/null @@ -1,609 +0,0 @@ -import os -import json -import tempfile -import requests - -try: - import pytesseract -except Exception as e: - print("Skipping pytesseract import: %s" % e) - -try: - import PyPDF2 -except Exception as e: - print("Skipping PyPDF2 import: %s" % e) - -try: - from pdf2image import convert_from_path -except Exception as e: - print("Skipping pdf2image import: %s" % e) - - -try: - import llama_cpp -except Exception as e: - print("Skipping llama_cpp import: %s" % e) - -print("LD Library: '%s'" % os.environ.get("LD_LIBRARY_PATH", "")) - -from shuffle_sdk import AppBase - -#model = "/models/Llama-3.2-3B.Q8_0.gguf" # Larger -#model = "/models/Llama-3.2-3B.Q2_K.gguf" # Smol - -#model = "/models/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf" # Smaller -#model = "/models/Meta-Llama-3-8B.Q6_K.gguf" -model = "/models/DeepSeek-R1-Distill-Llama.gguf" -if os.getenv("MODEL_PATH"): - model = os.getenv("MODEL_PATH") - -def load_llm_model(model): - print("Using model path '%s'" % model) - if not os.path.exists(model): - print("Could not find model at path %s" % model) - model_name = model.split("/")[-1] - # Check $HOME/downloads/{model} - - home_path = os.path.expanduser("~") - print(home_path) - - if os.path.exists(f"{home_path}/downloads/{model_name}"): - model = f"{home_path}/downloads/{model_name}" - else: - return { - "success": False, - "reason": "Model not found at path %s" % model, - "details": "Ensure the model path is correct" - } - - # Check for GPU layers - innerllm = None - gpu_layers = os.getenv("GPU_LAYERS") - if gpu_layers: - print("GPU Layers: %s" % gpu_layers) - - gpu_layers = int(gpu_layers) - if gpu_layers > 0: - innerllm = llama_cpp.Llama(model_path=model, n_gpu_layers=gpu_layers) - else: - innerllm = llama_cpp.Llama(model_path=model, n_gpu_layers=8) - else: - # Check if GPU available - print("No GPU layers set.") - #innerllm = llama_cpp.Llama(model_path=model) - - return { - "success": False, - "reason": "GPU layers not set", - "details": "Set GPU_LAYERS environment variable to the number of GPU layers to use (e.g. 8)." - } - - return innerllm - -try: - llm = load_llm_model(model) -except Exception as e: - print("[ERROR] Failed to load LLM model: %s" % e) - llm = { - "success": False, - "reason": "Failed to load LLM model %s" % model, - } - -class ShuffleAI(AppBase): - __version__ = "1.0.0" - app_name = "Shuffle AI" - - def __init__(self, redis, logger, console_logger=None): - super().__init__(redis, logger, console_logger) - - def run_llm(self, input, system_message=""): - global llm - global model - - self.logger.info("[DEBUG] LD LIbrary: '%s'. If this is empty, GPU's may not work." % os.environ.get("LD_LIBRARY_PATH", "")) - - if not system_message: - system_message = "Answer their question directly. Don't use HTML or Markdown", - - self.logger.info("[DEBUG] Running LLM with model '%s'. To overwrite path, use environment variable MODEL_PATH=" % model) - - # Check if llm is a dict or not and look for success and reason in it - if not llm: - return { - "success": False, - "reason": "LLM model not loaded", - "details": "Ensure the LLM model is loaded", - "gpu_layers": os.getenv("GPU_LAYERS"), - } - - if isinstance(llm, dict): - if "success" in llm and not llm["success"]: - # List files in /model folder - llm["folder"] = os.listdir("/models") - llm["gpu_layers"] = os.getenv("GPU_LAYERS") - return llm - - self.logger.info("[DEBUG] Running LLM with input '%s' and system message '%s'. GPU Layers: %s" % (input, system_message, os.getenv("GPU_LAYERS"))) - - # https://github.com/abetlen/llama-cpp-python - try: - print("LLM: ", llm) - - self.logger.info("[DEBUG] LLM: %s" % llm) - output = llm.create_chat_completion( - max_tokens=100, - messages = [ - { - "role": "system", - "content": system_message, - }, - { - "role": "user", - "content": input, - } - ] - ) - except Exception as e: - return { - "success": False, - "reason": f"Failed to run local LLM. Check logs in this execution for more info: {self.current_execution_id}", - "details": f"{e}" - } - - self.logger.info("[DEBUG] LLM output: %s" % output) - - new_message = "" - if "choices" in output and len(output["choices"]) > 0: - new_message = output["choices"][0]["message"]["content"] - - parsed_output = { - "success": True, - "model": output["model"], - "output": new_message, - } - - if "tokens" in output: - parsed_output["tokens"] = output["tokens"] - - if "usage" in output: - parsed_output["tokens"] = output["usage"] - - if not os.getenv("GPU_LAYERS"): - parsed_output["debug"] = "GPU_LAYERS not set. Running on CPU. Set GPU_LAYERS to the number of GPU layers to use (e.g. 8)." - - return parsed_output - - def security_assistant(self): - # Currently testing outside the Shuffle environment - # using assistants and local LLMs - - return "Not implemented" - - def shuffle_cloud_inference(self, apikey, text, formatting="auto"): - headers = { - "Authorization": "Bearer %s" % apikey, - } - - if not formatting: - formatting = "auto" - - output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding." - if formatting != "auto": - output_formatting = formatting - - ret = requests.post( - "https://shuffler.io/api/v1/conversation", - json={ - "query": text, - "formatting": output_formatting, - "output_format": "formatting" - }, - headers=headers, - ) - - if ret.status_code != 200: - print(ret.text) - return { - "success": False, - "reason": "Status code for auto-formatter is not 200" - } - - return ret.text - - def autoformat_text(self, apikey, text, formatting="auto"): - headers = { - "Authorization": "Bearer %s" % apikey, - } - - if not formatting: - formatting = "auto" - - output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding." - if formatting != "auto": - output_formatting = formatting - - ret = requests.post( - "https://shuffler.io/api/v1/conversation", - json={ - "query": text, - "formatting": output_formatting, - "output_format": "formatting" - }, - headers=headers, - ) - - if ret.status_code != 200: - print(ret.text) - return { - "success": False, - "reason": "Status code for auto-formatter is not 200" - } - - return ret.text - - def generate_report(self, apikey, input_data, report_title, report_name="generated_report.html"): - headers = { - "Authorization": "Bearer %s" % apikey, - } - - if not report_name: - report_name = "generated_report.html" - - if "." in report_name and not ".html" in report_name: - report_name = report_name.split(".")[0] - - if not "html" in report_name: - report_name = report_name + ".html" - - report_name = report_name.replace(" ", "_", -1) - output_formatting= "Format the following text into an HTML report with relevant graphs and tables. Title of the report should be {report_title}." - ret = requests.post( - "https://shuffler.io/api/v1/conversation", - json={ - "query": text, - "formatting": output_formatting, - "output_format": "formatting" - }, - headers=headers, - ) - - if ret.status_code != 200: - print(ret.text) - return { - "success": False, - "reason": "Status code for auto-formatter is not 200" - } - - # Make it into a shuffle file with self.set_files() - new_file = { - "name": report_name, - "data": ret.text, - } - - retdata = self.set_files([new_file]) - if retdata["success"]: - return retdata - - return { - "success": False, - "reason": "Failed to upload file" - } - - - def extract_text_from_pdf(self, file_id): - def extract_pdf_text(pdf_path): - with open(pdf_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text = '' - for page in pdf_reader.pages: - text += page.extract_text() - - return text - - def extract_text_from_images(images): - text = '' - for image in images: - extracted_text = pytesseract.image_to_string(image, lang='eng') - text += extracted_text - return text - - def extract_text_from_pdf_with_images(pdf_path): - images = convert_from_path(pdf_path) - return extract_text_from_images(images) - - def export_text_to_json(image_text, extracted_text): - data = { - "success": True, - 'image_text': image_text, - 'extracted_text': extracted_text, - } - - #with open(output_path, 'w+') as file: - # json.dump(data, file, indent=4) - - return data - - pdf_data = self.get_file(file_id) - defaultdata = { - "success": False, - "file_id": file_id, - "filename": pdf_data["filename"], - "reason": "Something failed in reading and parsing the pdf. See error logs for more info", - } - - # Check type of pdf_data["data"] - if not isinstance(pdf_data["data"], bytes): - self.logger.info("Encoding data to bytes for the bytestream reader") - pdf_data["data"] = pdf_data["data"].encode() - - # Make a tempfile for the file data from self.get_file - # Make a tempfile with tempfile library - with tempfile.NamedTemporaryFile() as temp: - # Write the file data to the tempfile - # Get the path to the tempfile - temp.write(pdf_data["data"]) - pdf_path = temp.name - - # Extract text from the PDF - extracted_text_from_pdf = extract_pdf_text(pdf_path) - - # Extract text from the PDF using images - extracted_text_from_images = extract_text_from_pdf_with_images(pdf_path) - - # Combine the extracted text - - # Export combined text to JSON - #output_path = pdf_path.split(".")[0] + ".json" - exported_text = export_text_to_json(extracted_text_from_images, extracted_text_from_pdf) - exported_text["file_id"] = file_id - exported_text["filename"] = pdf_data["filename"] - return exported_text - - return defaultdata - - def extract_text_from_image(self, file_id): - # Check if it's a pdf - - pdf_data = self.get_file(file_id) - if "filename" not in pdf_data: - available_fields = [] - for key, value in pdf_data.items(): - available_fields.append(key) - - return { - "success": False, - "reason": "File not found", - "details": f"Available fields: {available_fields}", - } - - # If it is, use extract_text_from_pdf - # If it's not, use pytesseract - if pdf_data["filename"].endswith(".pdf"): - return self.extract_text_from_pdf(file_id) - - defaultdata = { - "success": False, - "file_id": file_id, - "filename": pdf_data["filename"], - "reason": "Something failed in reading and parsing the pdf. See error logs for more info", - } - - with tempfile.NamedTemporaryFile() as temp: - # Load temp as Image - # Write the file data to the tempfile - # Get the path to the tempfile - temp.write(pdf_data["data"]) - pdf_path = temp.name - - image = Image.open(temp.name) - image = image.resize((500,300)) - custom_config = r'-l eng --oem 3 --psm 6' - text = pytesseract.image_to_string(image,config=custom_config) - - data = { - "success": True, - 'extracted_text': text, - } - - return data - - return defaultdata - - def transcribe_audio(self, file_id): - return { - "success": False, - "reason": "Not implemented yet" - } - - def find_image_objects(self, file_id): - return { - "success": False, - "reason": "Not implemented yet" - } - - def gpt(self, input_text): - return { - "success": False, - "reason": "Not implemented yet" - } - - def run_agent(self, input_data, actions=None, apps=None): - prepared_format = { - "id": self.action["id"], - "params": { - "tool_name": self.action["app_name"], - "tool_id": self.action["app_id"], - "environment": self.action["environment"], - "input": { - "text": input_data, - } - }, - } - - if actions: - prepared_format["params"]["tool_name"] = actions - - if apps: - pass - - baseurl = f"{self.url}/api/v1/agent?execution_id={self.current_execution_id}&authorization={self.authorization}&action_id={self.action['id']}" - self.logger.info("[DEBUG] Running agent action with URL '%s'" % (baseurl)) - - headers = {} - request = requests.post( - baseurl, - json=prepared_format, - headers=headers, - ) - - # Random sleep timer to force delay - time.sleep(2) - # Gets into waiting state on backend - return json.dumps({ - "app_run": True, - "input_prompt": prepared_format, - "status": request.status_code, - "body": request.text, - }) - - def run_schemaless(self, category, action, app_name="", fields=""): - self.logger.info("[DEBUG] Running schemaless action with category '%s' and action label '%s'" % (category, action)) - - # Not necessary anymore - """ - action := shuffle.CategoryAction{ - Label: step.Name, - Category: step.Category, - AppName: step.AppName, - Fields: step.Fields, - - Environment: step.Environment, - - SkipWorkflow: true, - } - """ - - data = { - "label": action, - "category": category, - - "app_name": "", - "fields": [], - - "skip_workflow": True, - } - - if app_name: - data["app_name"] = app_name - - if fields: - if isinstance(fields, list): - data["fields"] = fields - - elif isinstance(fields, dict): - for key, value in fields.items(): - parsedvalue = str(value) - try: - if str(value).startswith("{") or str(value).startswith("["): - parsedvalue = json.dumps(value) - except: - pass - - data["fields"].append({ - "key": key, - "value": parsedvalue, - }) - - else: - fields = str(fields).strip() - # Valid format: - # {"field1": "value1", "field2": "value2"} - # field1=value1&field2=value2 - # field1:value1\nfield2:value2 - - cursplit = None - if "\\n" in fields and not fields.startswith("{") and not fields.startswith("["): - cursplit = "\\n" - elif ("=" in fields or ":" in fields) and not fields.startswith("{") and not fields.startswith("["): - cursplit = "&" - - if cursplit: - newfields = [] - for line in fields.split(cursplit): - splitkey = None - if "=" in line: - splitkey = "=" - elif ":" in line: - splitkey = ":" - - if splitkey: - parts = line.split(splitkey, 1) - newfields.append({ - "key": parts[0].strip(), - "value": splitkey.join(parts[1:]).strip(), - }) - - data["fields"] = newfields - else: - if not fields.startswith("{") and not fields.startswith("["): - fields = json.dumps({ - "data": fields, - }) - - try: - loadedfields = json.loads(fields) - for key, value in loadedfields.items(): - data["fields"].append({ - "key": key, - "value": value, - }) - - except Exception as e: - self.logger.info("[ERROR] Failed to load fields as JSON: %s" % e) - return json.dumps({ - "success": False, - "reason": "Ensure 'Fields' are valid JSON", - "details": "%s" % e, - }) - - #baseurl = "%s/api/v1/apps/categories/run" % self.base_url - baseurl = "%s/api/v1/apps/categories/run" % self.url - baseurl += "?execution_id=%s&authorization=%s" % (self.current_execution_id, self.authorization) - - self.logger.info("[DEBUG] Running schemaless action with URL '%s', category %s and action label %s" % (baseurl, category, action)) - - headers = {} - request = requests.post( - baseurl, - json=data, - headers=headers, - ) - - try: - if "parameters" in self.action: - response_headers = request.headers - for key, value in response_headers.items(): - if not str(key).lower().endswith("-url"): - continue - - self.action["parameters"].append({ - "name": key, - "value": value, - }) - - #self.logger.info("[DEBUG] Response header: %s: %s" % (key, value)) - except Exception as e: - self.logger.info("[ERROR] Failed to get response headers (category action url debug mapping): %s" % e) - - try: - data = request.json() - - #if "success" in data and "result" in data and "errors" in data: - # return data["result"] - - return data - except: - return request.text - -if __name__ == "__main__": - ShuffleAI.run() diff --git a/shuffle-ai/1.0.0/upload.sh b/shuffle-ai/1.0.0/upload.sh deleted file mode 100755 index b449aa4b..00000000 --- a/shuffle-ai/1.0.0/upload.sh +++ /dev/null @@ -1,16 +0,0 @@ -gcloud config set project shuffler - -gcloud beta run deploy shuffle-ai-1-0-0 \ - --project=shuffler \ - --region=europe-west4 \ - --source=./ \ - --max-instances=1 \ - --concurrency=64 \ - --gpu 1 --gpu-type=nvidia-l4 \ - --cpu 4 \ - --memory=16Gi \ - --no-cpu-throttling \ - --set-env-vars=MODEL_PATH=/models/DeepSeek-R1-Distill-Llama.gguf,GPU_LAYERS=64,SHUFFLE_APP_EXPOSED_PORT=8080,SHUFFLE_SWARM_CONFIG=run,SHUFFLE_LOGS_DISABLED=true,SHUFFLE_APP_SDK_TIMEOUT=300,LD_LIBRARY_PATH=/usr/local/lib:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH \ - --source=./ \ - --service-account=shuffle-apps@shuffler.iam.gserviceaccount.com \ - --timeout=120s From e02aaadf983f7e610610b6f5e96865ce6a9cd1cb Mon Sep 17 00:00:00 2001 From: Frikky Date: Tue, 10 Feb 2026 13:04:23 +0100 Subject: [PATCH 13/15] Re-added shuffle-ai --- shuffle-ai/1.1.0/Dockerfile | 54 +++ shuffle-ai/1.1.0/Dockerfile_GPU | 108 ++++++ shuffle-ai/1.1.0/api.yaml | 167 ++++++++ shuffle-ai/1.1.0/requirements.txt | 5 + shuffle-ai/1.1.0/src/app.py | 609 ++++++++++++++++++++++++++++++ shuffle-ai/1.1.0/upload.sh | 16 + 6 files changed, 959 insertions(+) create mode 100644 shuffle-ai/1.1.0/Dockerfile create mode 100644 shuffle-ai/1.1.0/Dockerfile_GPU create mode 100644 shuffle-ai/1.1.0/api.yaml create mode 100644 shuffle-ai/1.1.0/requirements.txt create mode 100644 shuffle-ai/1.1.0/src/app.py create mode 100755 shuffle-ai/1.1.0/upload.sh diff --git a/shuffle-ai/1.1.0/Dockerfile b/shuffle-ai/1.1.0/Dockerfile new file mode 100644 index 00000000..9b059f27 --- /dev/null +++ b/shuffle-ai/1.1.0/Dockerfile @@ -0,0 +1,54 @@ +FROM python:3.10-alpine + +# Install all alpine build tools needed for our pip installs +#RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git poppler-utils + +# Install all of our pip packages in a single directory that we can copy to our base image later +RUN mkdir /install +WORKDIR /install + +# Switch back to our base image and copy in all of our built packages and source code +#COPY --from=builder /install /usr/local +COPY src /app +COPY requirements.txt /requirements.txt +RUN python3 -m pip install -r /requirements.txt + +# Install any binary dependencies needed in our final image +# RUN apk --no-cache add --update my_binary_dependency +RUN apk --no-cache add jq git curl + +ENV SHELL=/bin/bash + +### Install Tesseract +ENV CC /usr/bin/clang +ENV CXX /usr/bin/clang++ +ENV LANG=C.UTF-8 +ENV TESSDATA_PREFIX=/usr/local/share/tessdata + +# Dev tools +WORKDIR /tmp +RUN apk update +RUN apk upgrade +RUN apk add file openssl openssl-dev bash tini leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev libgcc mupdf-dev jbig2dec-dev +RUN apk add freetype-dev openblas-dev ffmpeg-dev linux-headers aspell-dev aspell-en # enchant-dev jasper-dev +RUN apk add --virtual .dev-deps git clang clang-dev g++ make automake autoconf libtool pkgconfig cmake ninja +RUN apk add --virtual .dev-testing-deps -X http://dl-3.alpinelinux.org/alpine/edge/testing autoconf-archive +RUN ln -s /usr/include/locale.h /usr/include/xlocale.h + +RUN apk add tesseract-ocr +RUN apk add poppler-utils + +# Install from main +RUN mkdir /usr/local/share/tessdata +RUN mkdir src +RUN cd src +RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata +RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git + +#RUN curl -fsSL https://ollama.com/install.sh | sh +#RUN ollama pull llama3.2 +#RUN cd tesseract && ./autogen.sh && ./configure --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl && make && make install && cd /tmp/src + +# Finally, lets run our app! +WORKDIR /app +CMD ["python", "app.py", "--log-level", "DEBUG"] diff --git a/shuffle-ai/1.1.0/Dockerfile_GPU b/shuffle-ai/1.1.0/Dockerfile_GPU new file mode 100644 index 00000000..ed2798c2 --- /dev/null +++ b/shuffle-ai/1.1.0/Dockerfile_GPU @@ -0,0 +1,108 @@ +FROM python:3.10.18-slim + +# Switch back to our base image and copy in all of our built packages and source code +COPY requirements.txt /requirements.txt +# Check if requirements.txt contains llama-cpp-python or not +RUN grep -q "^llama-cpp-python" /requirements.txt \ + || (echo "❌ requirements.txt missing llama-cpp-python" && exit 1) + +### Install Tesseract +ENV SHELL=/bin/bash +ENV CC /usr/bin/clang +ENV CXX /usr/bin/clang++ +ENV LANG=C.UTF-8 +ENV TESSDATA_PREFIX=/usr/local/share/tessdata + +# Install all build tools needed for our pip installs +RUN apt update +RUN apt install -y clang g++ make automake autoconf libtool cmake + +## Install the same packages with apt as with apk, but ensure they exist in apt +RUN apt install -y jq git curl +RUN apt install -y file openssl bash tini libpng-dev aspell-en +RUN apt install -y git clang g++ make automake autoconf libtool cmake +RUN apt install -y autoconf-archive wget + +# Install cuda toolkit +#RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-archive-keyring.gpg +#RUN dpkg -i cuda-archive-keyring.gpg +#RUN rm cuda-archive-keyring.gpg +#RUN apt update +#RUN apt install -y cuda +#RUN echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc +#RUN echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc +#RUN source ~/.bashrc + +# Larger model +RUN mkdir -p /models + +# Fails. 6 bit, 8B model. +#RUN wget https://huggingface.co/RichardErkhov/meta-llama_-_Meta-Llama-3-8B-gguf/blob/main/Meta-Llama-3-8B.Q6_K.gguf?download=true -O /models/Meta-Llama-3-8B.Q6_K.gguf +#ENV MODEL_PATH="/models/Meta-Llama-3-8B.Q6_K.gguf" + +# Simple small Llama wrapper +RUN wget https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf?download=true -O /models/DeepSeek-R1-Distill-Llama.gguf +# Larger one +#RUN wget https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf?download=true -O /models/DeepSeek-R1-Distill-Llama.gguf +ENV MODEL_PATH="/models/DeepSeek-R1-Distill-Llama.gguf" + +# Failing? Bad magic bytes. +#RUN wget https://huggingface.co/QuantFactory/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B.Q2_K.gguf?download=true -O /models/Llama-3.2-3B.Q2_K.gguf + + +# Install all of our pip packages in a single directory that we can copy to our base image later +RUN mkdir /install +WORKDIR /install + +RUN python3 -m pip install -r /requirements.txt +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir + + +# Install any binary dependencies needed in our final image + + +# Dev tools +WORKDIR /tmp +#RUN apk update +#RUN apk upgrade + + +RUN ln -s /usr/include/locale.h /usr/include/xlocale.h + +#RUN apk add tesseract-ocr +RUN apt install -y tesseract-ocr +#RUN apk add poppler-utils +RUN apt install -y poppler-utils +RUN apt clean && rm -rf /var/lib/apt/lists/* + +# Install from main +RUN mkdir /usr/local/share/tessdata +RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata + +RUN mkdir src +RUN cd src + +RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git + +#RUN curl -fsSL https://ollama.com/install.sh | sh +# Install to /usr/local +#RUN wget https://ollama.com/install.sh -O /usr/local/bin/ollama-install +#RUN chmod +x /usr/local/bin/ollama-install +#RUN sh /usr/local/bin/ollama-install +# +#RUN ls -alh /usr/bin +#RUN ollama serve & sleep 2 && ollama pull nezahatkorkmaz/deepseek-v3 +#CMD ["sh", "-c", "ollama serve & sleep 2 && python app.py --log-level DEBUG"] + +#RUN wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf +RUN python3 -m pip install ctransformers --no-binary ctransformers + +# Finally, lets run our app! +ENV GIN_MODE=release +ENV SHUFFLE_APP_SDK_TIMEOUT=300 +#ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so +#RUN chmod 755 /usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so + +COPY src /app +WORKDIR /app +CMD ["python", "app.py", "--log-level", "DEBUG"] diff --git a/shuffle-ai/1.1.0/api.yaml b/shuffle-ai/1.1.0/api.yaml new file mode 100644 index 00000000..ab3f6243 --- /dev/null +++ b/shuffle-ai/1.1.0/api.yaml @@ -0,0 +1,167 @@ +--- +app_version: 1.1.0 +name: Shuffle AI +description: An EXPERIMENTAL AI tool app for Shuffle +tags: + - AI + - Shuffle + - LLM +categories: + - AI + - LLM + - Shuffle +contact_info: + name: "@frikkylikeme" + url: https://shuffler.io + email: support@shuffler.io +actions: + #- name: run_llm + # description: "Runs a local LLM, with a GPU or CPU (slow). Default model is set up in Dockerfile" + # parameters: + # - name: input + # description: "The input question to the model" + # required: true + # multiline: true + # example: "" + # schema: + # type: string + # - name: system_message + # description: "The system message use, if any" + # required: false + # multiline: false + # example: "" + # schema: + # type: string + + - name: shuffle_cloud_inference + description: Input ANY kind of data in the format you want, and the format you want it in. Default is a business-y email. Uses ShuffleGPT, which is based on OpenAI and our own model. + parameters: + - name: apikey + description: Your https://shuffler.io apikey + required: true + multiline: false + example: "" + schema: + type: string + - name: text + description: The text you want to be converted (ANY format) + required: true + multiline: true + example: "Bad IPs are 1.2.3.4 and there's no good way to format this. JSON works too!" + schema: + type: string + - name: formatting + description: The format to use. + required: false + multiline: true + example: "Make it work as a ticket we can put in service now that is human readable for security analysts" + schema: + type: string + returns: + schema: + type: string + - name: generate_report + description: Input ANY kind of data in the format you want, and it will make an HTML report for you. This can be downloaded from the File location. + parameters: + - name: apikey + description: Your https://shuffler.io apikey + required: true + multiline: false + example: "" + schema: + type: string + - name: input_data + description: The text you want to be converted (ANY format) + required: true + multiline: true + example: "Bad IPs are 1.2.3.4 and there's no good way to format this. JSON works too!" + schema: + type: string + - name: report_title + description: The report title to be used in the report + required: true + multiline: true + example: "Statistics for October" + schema: + type: string + - name: report_name + description: The name of the HTML file + required: false + multiline: true + example: "statistics.html" + schema: + type: string + returns: + schema: + type: string + - name: extract_text_from_pdf + description: Returns text from a pdf + parameters: + - name: file_id + description: The file to find text in + required: true + multiline: false + example: "file_" + schema: + type: string + returns: + schema: + type: string + - name: extract_text_from_image + description: Returns text from an image + parameters: + - name: file_id + description: The file to find text in + required: true + multiline: false + example: "file_" + schema: + type: string + returns: + schema: + type: string + - name: run_schemaless + description: Runs an automatically translated action + parameters: + - name: category + description: The category the action is in + required: true + multiline: false + schema: + type: string + - name: action + description: The action label to run + required: true + multiline: false + schema: + type: string + - name: app_name + description: The app to run the action in + required: false + multiline: false + schema: + type: string + - name: fields + description: The additional fields to add + required: false + multiline: false + schema: + type: string + returns: + schema: + type: string + - name: transcribe_audio + description: Returns text from audio + parameters: + - name: file_id + description: The file containing the audio + required: true + multiline: false + example: "file_" + schema: + type: string + returns: + schema: + type: string + +large_image: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAK4AAACuCAYAAACvDDbuAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAABmJLR0QA/wD/AP+gvaeTAAAAB3RJTUUH5AgXDjM6hEZGWwAAD+lJREFUeNrtXb/vJTcRH7/v3iVBCqRBiCAQAtHwq4AWRElHwX8AoqbmXwDRpiH/QyQkGoogUSAhKIKUAE1IdSRSREhQQk7c3XtD8X55vePxjNfe3bk3H+nu+96uPf54POtnj8fe8OQX30JwOIxhtzYBh6MGOsPF0z9p2iWwpd8LjX6W5vWUYaiqlBuvLT5b5TQDPlRwmMSAABBg+kCer+XuAeQf4tL9tAxJ/hIfZGSm8rhyEfjytfxr9FeSX+KjvVfipNVpWlaPNhsAEPCS7Ao8FYnRlbO4ksLnjiSQvIanv4FNjwJ5pXIlMq6MQpIqqPnQKQKbjuPDtZlG55o6UHXWtVncZZTbbNBVB1P5dJYguCbJJ1WjOG8PVOioSm5HPrVt1rwuyN+K+PSZnNV1M/MmEFubfFjjU9tmK9XBJ2cOk3DDdZiEG67DJOrGuA7HyvAe12ESAxa73KPrN1z8gUikCCdvcD5NXnpQpA8nNhh9m5Yn4ZMrV8dHV/8a/dRA0x419a3lI9GBtM2GcrGYFXRNUU5TyluTOpdXwqeUt6YOpby9DUTLZylOcRlzdBTf2yV3ZBFOmKSHQh5KpjSSSpqG4s6VkUubqw8W8knTSnWk0Y+2jF5tlmuDUloJn6T8gRVcEpJ+3srChHSNt8RJsq4p+S41LC13KTcu/RJt1pLPKY1Pzhwm4YbrMAk3XIdJTMe4aeCjJhBVk0YiQ1MWZHhLgmO5QNVWfKRlavlIIQnurQmcnaMjSbBxhtMwYUxODpLcl2tUhvPlNE6VkiuoFVLXKT6ZfBjxRIIzOSlgWpLSB8uZ0g3BjeVDlFGEos0mfKKL7CQrY2ES7pM2i/OX22w4/sWReEhEnUOTxx3a+FrawQGZh04/rWe6oJBKo5zT4zLjPHE9ZHym5YzToogzfQcmfLgOhuLF/Sjm2izVDyXnrKtcmmmdaKumf+RyCw5Xn7OmzQaJF0fiEZG6BjXpYUYaSVkaPrXeHe4eVaZEr3Prqrmmrbc2T8lrmOMjn5xJHeJLYkk+PfzNTxOflrwF0EeHbU0Zt2wsW+PTkncB7g5zmMSwzUfS4eDhPa7DJK5jXGorsnZxonbRIbeAoOUjkUvlp+qxFp9YNuWL0nBqsVCkqUsrHQnuX+Nx5/qcJDI0kWgtJh7ihYCN8aG+13DqOXlbWUfD+fN0AUEmp3RcUWlVEwCynb5ssYLnxHViJT6ULCykb8EnzUfpqBWfVAdcnt5tprGhIe10WnjHpB2FtMPWcpM66yXyOad4Lz4Srq34SHhwZfRos1w9Y/jkzGESvj3dYRLe4zpMwg3XYRJuuA6T4M/Hzfk/OGd9OP2HOE2f8wtBlCebJrkfp+Gc3AGmiSiuaVlpwkmajL4osPUm9FMqIzBOJolfjGuzEtdUwWl53Dm7Eh9pzIdps+FiYJyi1N+Rvs/6OLCQBul8Ip8R08ik3EwhLZz1Wv8XmU7ZZqX7OT2gUIB2oaRBm+2ovDm5nM+ulEeiD8yka8UnJ1PCP82r9YWW8iCU5XO8W/PhPmvllNKW7lEyszsgNKuzkspJFZFL15uPtIweq7A1xiKpz1J8tGXP+dE53/fJmcMk6hcgJO8XqokEKi5uYzTG29LqSev95JqyKsoOOxjNpKQBD7VFc5GBJRsi+NQHkkv6+7m/UxTufwLCCy+CbAruyOLDdwEf/uf6vbbNJukzlogZC6wMdhAcM7ohHPawe/GrcO+HPwe4u782G7sIAE9++0vYv/YKwO6usfCaka0etgwXAGB3D8JznwIYnlmbiW0M92FbQy0d+MmZ3Xo5JDDcvuXJ2ZYqtyUuTwuM6nSXctcufHCOZqkjPScXhbIcdeD0XUpfKyNNy8nlyhuozLkM8XxR6pjm7tc4Fdx620I7lWq10JCm0ZanWoBwm3FsBe1WznpadbTg4A9PI2xx7FUKHopQjg7TKqNnpbioIUcFUGUsy1CS8fFYBYdJuOE6TMIN12ESgyiKiwO1bQOJe1w+6p42Etmhwmi6kLZXfC2G9IUj2vulY2wIPrv4onRhIXcRqS0DiWxkhF0uIb37wG22LRCSuVCyekC2GSXj9CG3YyT+krWh+KPAhkTvgGDKqbqnWbBwY+2Pnm3Wy4aMRYc1MuPDvp0skwgAh8PaJGbh5k4kx0f/hce/ewnw/QenXQCTFJDfQy45PzFNn5NHsoPy/u6gzE+nObzz91P9Z+6kWAm2zg6bDMoq8OQxHN78Axze/htAaB1EbQhhdzyfgRqIGoCxoUIjhDuA3ZDpcR0W4C3nMInbNVw7v4oOAsehArVFPL0uOjMM+DlM+pk7t7/BDuwcJsM6gcM7WweOX05nFCHNi12ASRfLo3QaX9O0GWTylOTnZIMwf4YPPTlD4iMm7aZwAGOUf3Rf48wjHNzVOMkKFA8pp0RHZ1mjdihs5R61PWbsWlphgs/E5gptNvFfSLY8QPk7dVbh+UNg8qfnJsZ8Bo0hzF0Y2Nqvc0s+Vbs5YL5OLfPRcorT2hvjtuxyHWZhzHCX6AMcFtB2B0RvtKZqqe6OEYz1uA7HEbdruN7ZmsZtGq4brXnQhlsbLFkDrY9mC9giH41/dSlONfeEIBcgss7nXopInPdkYN95J3XD1bMgkJUNFOxsDNLgyiynhYyX5dnAhnLyhzmO4V7IO8+xyZEgx5UqvJ41rOUTdhBOr2w6KjZc+B1FBkLGVUoAABQEcmPu6rPPw73v/gh2n/wMANYEhAd4/NqvYf/Wn5pEyPW2IUrOzQWSHyHdkEJgN8D97/0Edp/7GgDu9fnDDvD9t+HRqy8BPvxQ9i6xEXUEuPcMDF//Puw+/aVqDewfvA77f/zx9M40e7jNeNw5CDu4++K34e4r36kWcXj3TYDfvwz8D79ml1clDPuxx9FhuUik0rblVihFWLX+7ZFEXE2ioLBNg9fUSRopVsOjJbioskZlDuyAvmflpOWsOUNu/cBQ8jW/1A0np11RG+GjwG36cQHqFWnBcG4Axgx37d/I1uXXcvCnx6BXoQXf3mOAzvVpooJzaOcWdKBH1fZ07dCsFZpNgmfZbaOJ2dxnpwkNFC3C9MBcGxo0OugxwV8LWKm5lg9sFQdszKGhLAla2dCuduuOZcypx+UXdk0OK5e/hXKNTc4cjiPGhtvTX1njI6Z2+vbuKtaKspLooXdkXs1u5yUR7/LdROMsraSSIfTa6pqWodE9Mvla6sCI8d7uUMEXIEzjdg3XYRr2osOePIbDR+9BGO7re78QAD/+AODwpK5sBDg6dGyGAtL1sYnLGDe3+2BNTNycYQf7B2/Aw5d/XB9HejjA4YN3jgHUNQ132MOTv/wG9v98A+CgFBCO/+FH/wJ89PBaSY1OULZzQyQL2skayVwg/7Dk3Ky2IlcEgEcfw/7dt+YJnRP1f9jDoz+/AvM0FU4c1u8mes59e+ZXDhXmPE+tForD+lH73Q6EluiozfaldnzWQUWQzdprPk87lg44nkTKN+DT/10S7lW4VYz8wWucOTAPtl5e4mgfjmu0/b3HdZiEG67DJNxwbxlGhwkAuZeXAJS3Qpfemq7dds1tS5dsbc6dAyQpS5uGe+lKrJLSGUqlCb2GcwUuCxBzt71T2/g7t9mQniofv0yjWOtMYdSLM6Sy0pd5iLdFSQtUyiJtRnjmGOdhqq5bo5WzUXAYzns2Lu2tjaqb0WaTHRBrR9cvEVG4VF3WkLsGnzXqohzjbk3dt4hG/jDDxy8BLL5y5miBZi1wa9vT14dJ0o2qft6/1GhQZ1SV9uJxd3cQ7j+XD7RJ40JK38/XAPKz4ly+OG+KwOTDwn0uDSKEZ58/vgH+hmHLcA97uPvCN+G5H/wMoCaQ/KkAAtzdg/DCZ9cmsipsGS4ce5u7z38DYHhmbTL2YfjBH28DOM80s+MoxllVvfkwKudSbiL0dB0NTya2iGpNYmIzl+/EdexjQ8PEGE4FhdPHMAlbLhcsdWaPnfDEAxQJnbx53TEPJ51j3N7CrEfbSNt+arzXt57X2RBx94LsUGHOGRQtF7Fa8HFQQOabJmc5XQ8b8iAbh0mYNFzvdefD+nRhyPowqWitc2VbRyutGCF18+ilU2mEXWX51zFuKbqlZ/RLy0gixzagiS6sgL2hghuwAywarsMBxgzXO9u2sBzZWHwHRLwrQ5rWYQBIfuwCKnZJEpvEYSg9dRoncnejtdxFbBRLqFQzr5fSudH3nDmOaH26yHIwNcZ1NIZNmwWArYU1Fg8HDLB/7wH879VfAey2Rd0a9g/+2ubUyZUOdAz//umXjT136GPd2cDNnM9bC4Pd1gbOx3WsDh/jOkzCDddhEpcjmKiFhvGLQwDitJNrYTz05H7MS+N56hiq0mbYCfeIj2STb2s+cSJEOrguJ4fScaneOW7kOWZJm4VCmaPFg8wKgcSGuLpzR49Rerm8vIRaaECgvyB1Tbl9qOZoMiykHeVhVoZKwW9N+CSJuPwsH4YY12aTa5TxYyZPpsxSDG/Rhgp1lyxUnK/7UMFhEm64DpNIlnzTAdXcsJml8rdO1yt/K+R45EJUluS9zHaWITuQJb9rsVT+HvuKe+RvhdIIcE3ey4Rj+VDBYRJuuA6TcMN1mMT15SWMZ5h10Oc86+dr50s14QWch7rEh5PHef+psgsyqB0iI2e+hE+pDlpvvkQ/uVUMDfdSnTq12TA58injFUdOMPB5AeiALtHcUrstXrqSINnaoVjxyE5ra1ZipHMsTV2kMiQ8NDw7tdmqQ4WtzNEd9uBjXIdJuOE6TMLoy0sct46KHndNS6d2pW5tp+rW+Jw5rVl2qpP5Oqrcnr52w9RMgbfA8db5tAsp8DGuwyTaGW6DB7ppn9CCzxKnvKz9Kz7j/prUi0cwqQLQDBtvrp5uvMc/Wf00oFAT5FjscbcwMloCt1LPWvTUT41sH+M6TMIN12ESw3UPd8gPtrh7JeTyXvZGn0KD0jSlMms5Sfhw92vkUvXT5tPWt3WbSfjMsSFl3ujlJdy+4xkjnFze+PWrNWXWclqaT6t82vq2bjMJnzk2pMzrQwWHSbjhOkzCDdchxpZchpezwySQvHhiyVMLevPRctXwqeWmfcv5GaVTGKRy557YIHnhpETeoCl05grhbPlL89HK1vCp5darvZbgo+XEwYcKDpNww3WYxC6/U5PY5oun66MzPHH8L05PpqHKghn+TpjyictkZQLPh4u6yeknvXeWU+JD6TDHJ/cbn93Bi8nnDKdJm8EG2+zIZwBudlbjUOYOpj1frClPwyf3OZuXuaEx3lgWZixKxIfZ911rvJO65PRFVmZjbYY+VHDYhBuuwyTccB0mcdkB0cr5z70pW/pm7Bo+LesgqUsrPjVye9WXkqld8FiizRCi6LBWjmTRPGGG/JZ5ejvoa1ai1qwvlWarbeZDBYdJuOE6TKKP4W7xJdFb4+R8ZvH5P852gxhpwOZ9AAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDIwLTA4LTIzVDE0OjUyOjAwKzAyOjAwetRgVgAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyMC0wOC0yM1QxNDo1MTo1OCswMjowMJuxI+oAAAAASUVORK5CYII= diff --git a/shuffle-ai/1.1.0/requirements.txt b/shuffle-ai/1.1.0/requirements.txt new file mode 100644 index 00000000..62bee6e9 --- /dev/null +++ b/shuffle-ai/1.1.0/requirements.txt @@ -0,0 +1,5 @@ +shuffle-sdk + +pytesseract +pdf2image +pypdf2 diff --git a/shuffle-ai/1.1.0/src/app.py b/shuffle-ai/1.1.0/src/app.py new file mode 100644 index 00000000..e5f5e9bc --- /dev/null +++ b/shuffle-ai/1.1.0/src/app.py @@ -0,0 +1,609 @@ +import os +import json +import tempfile +import requests + +try: + import pytesseract +except Exception as e: + print("Skipping pytesseract import: %s" % e) + +try: + import PyPDF2 +except Exception as e: + print("Skipping PyPDF2 import: %s" % e) + +try: + from pdf2image import convert_from_path +except Exception as e: + print("Skipping pdf2image import: %s" % e) + + +try: + import llama_cpp +except Exception as e: + print("Skipping llama_cpp import: %s" % e) + +print("LD Library: '%s'" % os.environ.get("LD_LIBRARY_PATH", "")) + +from shuffle_sdk import AppBase + +#model = "/models/Llama-3.2-3B.Q8_0.gguf" # Larger +#model = "/models/Llama-3.2-3B.Q2_K.gguf" # Smol + +#model = "/models/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf" # Smaller +#model = "/models/Meta-Llama-3-8B.Q6_K.gguf" +model = "/models/DeepSeek-R1-Distill-Llama.gguf" +if os.getenv("MODEL_PATH"): + model = os.getenv("MODEL_PATH") + +def load_llm_model(model): + print("Using model path '%s'" % model) + if not os.path.exists(model): + print("Could not find model at path %s" % model) + model_name = model.split("/")[-1] + # Check $HOME/downloads/{model} + + home_path = os.path.expanduser("~") + print(home_path) + + if os.path.exists(f"{home_path}/downloads/{model_name}"): + model = f"{home_path}/downloads/{model_name}" + else: + return { + "success": False, + "reason": "Model not found at path %s" % model, + "details": "Ensure the model path is correct" + } + + # Check for GPU layers + innerllm = None + gpu_layers = os.getenv("GPU_LAYERS") + if gpu_layers: + print("GPU Layers: %s" % gpu_layers) + + gpu_layers = int(gpu_layers) + if gpu_layers > 0: + innerllm = llama_cpp.Llama(model_path=model, n_gpu_layers=gpu_layers) + else: + innerllm = llama_cpp.Llama(model_path=model, n_gpu_layers=8) + else: + # Check if GPU available + print("No GPU layers set.") + #innerllm = llama_cpp.Llama(model_path=model) + + return { + "success": False, + "reason": "GPU layers not set", + "details": "Set GPU_LAYERS environment variable to the number of GPU layers to use (e.g. 8)." + } + + return innerllm + +try: + llm = load_llm_model(model) +except Exception as e: + print("[ERROR] Failed to load LLM model: %s" % e) + llm = { + "success": False, + "reason": "Failed to load LLM model %s" % model, + } + +class ShuffleAI(AppBase): + __version__ = "1.0.0" + app_name = "Shuffle AI" + + def __init__(self, redis, logger, console_logger=None): + super().__init__(redis, logger, console_logger) + + def run_llm(self, input, system_message=""): + global llm + global model + + self.logger.info("[DEBUG] LD LIbrary: '%s'. If this is empty, GPU's may not work." % os.environ.get("LD_LIBRARY_PATH", "")) + + if not system_message: + system_message = "Answer their question directly. Don't use HTML or Markdown", + + self.logger.info("[DEBUG] Running LLM with model '%s'. To overwrite path, use environment variable MODEL_PATH=" % model) + + # Check if llm is a dict or not and look for success and reason in it + if not llm: + return { + "success": False, + "reason": "LLM model not loaded", + "details": "Ensure the LLM model is loaded", + "gpu_layers": os.getenv("GPU_LAYERS"), + } + + if isinstance(llm, dict): + if "success" in llm and not llm["success"]: + # List files in /model folder + llm["folder"] = os.listdir("/models") + llm["gpu_layers"] = os.getenv("GPU_LAYERS") + return llm + + self.logger.info("[DEBUG] Running LLM with input '%s' and system message '%s'. GPU Layers: %s" % (input, system_message, os.getenv("GPU_LAYERS"))) + + # https://github.com/abetlen/llama-cpp-python + try: + print("LLM: ", llm) + + self.logger.info("[DEBUG] LLM: %s" % llm) + output = llm.create_chat_completion( + max_tokens=100, + messages = [ + { + "role": "system", + "content": system_message, + }, + { + "role": "user", + "content": input, + } + ] + ) + except Exception as e: + return { + "success": False, + "reason": f"Failed to run local LLM. Check logs in this execution for more info: {self.current_execution_id}", + "details": f"{e}" + } + + self.logger.info("[DEBUG] LLM output: %s" % output) + + new_message = "" + if "choices" in output and len(output["choices"]) > 0: + new_message = output["choices"][0]["message"]["content"] + + parsed_output = { + "success": True, + "model": output["model"], + "output": new_message, + } + + if "tokens" in output: + parsed_output["tokens"] = output["tokens"] + + if "usage" in output: + parsed_output["tokens"] = output["usage"] + + if not os.getenv("GPU_LAYERS"): + parsed_output["debug"] = "GPU_LAYERS not set. Running on CPU. Set GPU_LAYERS to the number of GPU layers to use (e.g. 8)." + + return parsed_output + + def security_assistant(self): + # Currently testing outside the Shuffle environment + # using assistants and local LLMs + + return "Not implemented" + + def shuffle_cloud_inference(self, apikey, text, formatting="auto"): + headers = { + "Authorization": "Bearer %s" % apikey, + } + + if not formatting: + formatting = "auto" + + output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding." + if formatting != "auto": + output_formatting = formatting + + ret = requests.post( + "https://shuffler.io/api/v1/conversation", + json={ + "query": text, + "formatting": output_formatting, + "output_format": "formatting" + }, + headers=headers, + ) + + if ret.status_code != 200: + print(ret.text) + return { + "success": False, + "reason": "Status code for auto-formatter is not 200" + } + + return ret.text + + def autoformat_text(self, apikey, text, formatting="auto"): + headers = { + "Authorization": "Bearer %s" % apikey, + } + + if not formatting: + formatting = "auto" + + output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding." + if formatting != "auto": + output_formatting = formatting + + ret = requests.post( + "https://shuffler.io/api/v1/conversation", + json={ + "query": text, + "formatting": output_formatting, + "output_format": "formatting" + }, + headers=headers, + ) + + if ret.status_code != 200: + print(ret.text) + return { + "success": False, + "reason": "Status code for auto-formatter is not 200" + } + + return ret.text + + def generate_report(self, apikey, input_data, report_title, report_name="generated_report.html"): + headers = { + "Authorization": "Bearer %s" % apikey, + } + + if not report_name: + report_name = "generated_report.html" + + if "." in report_name and not ".html" in report_name: + report_name = report_name.split(".")[0] + + if not "html" in report_name: + report_name = report_name + ".html" + + report_name = report_name.replace(" ", "_", -1) + output_formatting= "Format the following text into an HTML report with relevant graphs and tables. Title of the report should be {report_title}." + ret = requests.post( + "https://shuffler.io/api/v1/conversation", + json={ + "query": text, + "formatting": output_formatting, + "output_format": "formatting" + }, + headers=headers, + ) + + if ret.status_code != 200: + print(ret.text) + return { + "success": False, + "reason": "Status code for auto-formatter is not 200" + } + + # Make it into a shuffle file with self.set_files() + new_file = { + "name": report_name, + "data": ret.text, + } + + retdata = self.set_files([new_file]) + if retdata["success"]: + return retdata + + return { + "success": False, + "reason": "Failed to upload file" + } + + + def extract_text_from_pdf(self, file_id): + def extract_pdf_text(pdf_path): + with open(pdf_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + text = '' + for page in pdf_reader.pages: + text += page.extract_text() + + return text + + def extract_text_from_images(images): + text = '' + for image in images: + extracted_text = pytesseract.image_to_string(image, lang='eng') + text += extracted_text + return text + + def extract_text_from_pdf_with_images(pdf_path): + images = convert_from_path(pdf_path) + return extract_text_from_images(images) + + def export_text_to_json(image_text, extracted_text): + data = { + "success": True, + 'image_text': image_text, + 'extracted_text': extracted_text, + } + + #with open(output_path, 'w+') as file: + # json.dump(data, file, indent=4) + + return data + + pdf_data = self.get_file(file_id) + defaultdata = { + "success": False, + "file_id": file_id, + "filename": pdf_data["filename"], + "reason": "Something failed in reading and parsing the pdf. See error logs for more info", + } + + # Check type of pdf_data["data"] + if not isinstance(pdf_data["data"], bytes): + self.logger.info("Encoding data to bytes for the bytestream reader") + pdf_data["data"] = pdf_data["data"].encode() + + # Make a tempfile for the file data from self.get_file + # Make a tempfile with tempfile library + with tempfile.NamedTemporaryFile() as temp: + # Write the file data to the tempfile + # Get the path to the tempfile + temp.write(pdf_data["data"]) + pdf_path = temp.name + + # Extract text from the PDF + extracted_text_from_pdf = extract_pdf_text(pdf_path) + + # Extract text from the PDF using images + extracted_text_from_images = extract_text_from_pdf_with_images(pdf_path) + + # Combine the extracted text + + # Export combined text to JSON + #output_path = pdf_path.split(".")[0] + ".json" + exported_text = export_text_to_json(extracted_text_from_images, extracted_text_from_pdf) + exported_text["file_id"] = file_id + exported_text["filename"] = pdf_data["filename"] + return exported_text + + return defaultdata + + def extract_text_from_image(self, file_id): + # Check if it's a pdf + + pdf_data = self.get_file(file_id) + if "filename" not in pdf_data: + available_fields = [] + for key, value in pdf_data.items(): + available_fields.append(key) + + return { + "success": False, + "reason": "File not found", + "details": f"Available fields: {available_fields}", + } + + # If it is, use extract_text_from_pdf + # If it's not, use pytesseract + if pdf_data["filename"].endswith(".pdf"): + return self.extract_text_from_pdf(file_id) + + defaultdata = { + "success": False, + "file_id": file_id, + "filename": pdf_data["filename"], + "reason": "Something failed in reading and parsing the pdf. See error logs for more info", + } + + with tempfile.NamedTemporaryFile() as temp: + # Load temp as Image + # Write the file data to the tempfile + # Get the path to the tempfile + temp.write(pdf_data["data"]) + pdf_path = temp.name + + image = Image.open(temp.name) + image = image.resize((500,300)) + custom_config = r'-l eng --oem 3 --psm 6' + text = pytesseract.image_to_string(image,config=custom_config) + + data = { + "success": True, + 'extracted_text': text, + } + + return data + + return defaultdata + + def transcribe_audio(self, file_id): + return { + "success": False, + "reason": "Not implemented yet" + } + + def find_image_objects(self, file_id): + return { + "success": False, + "reason": "Not implemented yet" + } + + def gpt(self, input_text): + return { + "success": False, + "reason": "Not implemented yet" + } + + def run_agent(self, input_data, actions=None, apps=None): + prepared_format = { + "id": self.action["id"], + "params": { + "tool_name": self.action["app_name"], + "tool_id": self.action["app_id"], + "environment": self.action["environment"], + "input": { + "text": input_data, + } + }, + } + + if actions: + prepared_format["params"]["tool_name"] = actions + + if apps: + pass + + baseurl = f"{self.url}/api/v1/agent?execution_id={self.current_execution_id}&authorization={self.authorization}&action_id={self.action['id']}" + self.logger.info("[DEBUG] Running agent action with URL '%s'" % (baseurl)) + + headers = {} + request = requests.post( + baseurl, + json=prepared_format, + headers=headers, + ) + + # Random sleep timer to force delay + time.sleep(2) + # Gets into waiting state on backend + return json.dumps({ + "app_run": True, + "input_prompt": prepared_format, + "status": request.status_code, + "body": request.text, + }) + + def run_schemaless(self, category, action, app_name="", fields=""): + self.logger.info("[DEBUG] Running schemaless action with category '%s' and action label '%s'" % (category, action)) + + # Not necessary anymore + """ + action := shuffle.CategoryAction{ + Label: step.Name, + Category: step.Category, + AppName: step.AppName, + Fields: step.Fields, + + Environment: step.Environment, + + SkipWorkflow: true, + } + """ + + data = { + "label": action, + "category": category, + + "app_name": "", + "fields": [], + + "skip_workflow": True, + } + + if app_name: + data["app_name"] = app_name + + if fields: + if isinstance(fields, list): + data["fields"] = fields + + elif isinstance(fields, dict): + for key, value in fields.items(): + parsedvalue = str(value) + try: + if str(value).startswith("{") or str(value).startswith("["): + parsedvalue = json.dumps(value) + except: + pass + + data["fields"].append({ + "key": key, + "value": parsedvalue, + }) + + else: + fields = str(fields).strip() + # Valid format: + # {"field1": "value1", "field2": "value2"} + # field1=value1&field2=value2 + # field1:value1\nfield2:value2 + + cursplit = None + if "\\n" in fields and not fields.startswith("{") and not fields.startswith("["): + cursplit = "\\n" + elif ("=" in fields or ":" in fields) and not fields.startswith("{") and not fields.startswith("["): + cursplit = "&" + + if cursplit: + newfields = [] + for line in fields.split(cursplit): + splitkey = None + if "=" in line: + splitkey = "=" + elif ":" in line: + splitkey = ":" + + if splitkey: + parts = line.split(splitkey, 1) + newfields.append({ + "key": parts[0].strip(), + "value": splitkey.join(parts[1:]).strip(), + }) + + data["fields"] = newfields + else: + if not fields.startswith("{") and not fields.startswith("["): + fields = json.dumps({ + "data": fields, + }) + + try: + loadedfields = json.loads(fields) + for key, value in loadedfields.items(): + data["fields"].append({ + "key": key, + "value": value, + }) + + except Exception as e: + self.logger.info("[ERROR] Failed to load fields as JSON: %s" % e) + return json.dumps({ + "success": False, + "reason": "Ensure 'Fields' are valid JSON", + "details": "%s" % e, + }) + + #baseurl = "%s/api/v1/apps/categories/run" % self.base_url + baseurl = "%s/api/v1/apps/categories/run" % self.url + baseurl += "?execution_id=%s&authorization=%s" % (self.current_execution_id, self.authorization) + + self.logger.info("[DEBUG] Running schemaless action with URL '%s', category %s and action label %s" % (baseurl, category, action)) + + headers = {} + request = requests.post( + baseurl, + json=data, + headers=headers, + ) + + try: + if "parameters" in self.action: + response_headers = request.headers + for key, value in response_headers.items(): + if not str(key).lower().endswith("-url"): + continue + + self.action["parameters"].append({ + "name": key, + "value": value, + }) + + #self.logger.info("[DEBUG] Response header: %s: %s" % (key, value)) + except Exception as e: + self.logger.info("[ERROR] Failed to get response headers (category action url debug mapping): %s" % e) + + try: + data = request.json() + + #if "success" in data and "result" in data and "errors" in data: + # return data["result"] + + return data + except: + return request.text + +if __name__ == "__main__": + ShuffleAI.run() diff --git a/shuffle-ai/1.1.0/upload.sh b/shuffle-ai/1.1.0/upload.sh new file mode 100755 index 00000000..b449aa4b --- /dev/null +++ b/shuffle-ai/1.1.0/upload.sh @@ -0,0 +1,16 @@ +gcloud config set project shuffler + +gcloud beta run deploy shuffle-ai-1-0-0 \ + --project=shuffler \ + --region=europe-west4 \ + --source=./ \ + --max-instances=1 \ + --concurrency=64 \ + --gpu 1 --gpu-type=nvidia-l4 \ + --cpu 4 \ + --memory=16Gi \ + --no-cpu-throttling \ + --set-env-vars=MODEL_PATH=/models/DeepSeek-R1-Distill-Llama.gguf,GPU_LAYERS=64,SHUFFLE_APP_EXPOSED_PORT=8080,SHUFFLE_SWARM_CONFIG=run,SHUFFLE_LOGS_DISABLED=true,SHUFFLE_APP_SDK_TIMEOUT=300,LD_LIBRARY_PATH=/usr/local/lib:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH \ + --source=./ \ + --service-account=shuffle-apps@shuffler.iam.gserviceaccount.com \ + --timeout=120s From 924b8ff4a15fb9d5a7098f71d83a8949841fd1a0 Mon Sep 17 00:00:00 2001 From: Frikky Date: Tue, 10 Feb 2026 13:10:28 +0100 Subject: [PATCH 14/15] Fixed requirements --- shuffle-ai/1.1.0/requirements.txt | 2 +- shuffle-tools/1.2.0/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shuffle-ai/1.1.0/requirements.txt b/shuffle-ai/1.1.0/requirements.txt index 62bee6e9..bb5d1927 100644 --- a/shuffle-ai/1.1.0/requirements.txt +++ b/shuffle-ai/1.1.0/requirements.txt @@ -1,4 +1,4 @@ -shuffle-sdk +shuffle-sdk==0.0.35 pytesseract pdf2image diff --git a/shuffle-tools/1.2.0/requirements.txt b/shuffle-tools/1.2.0/requirements.txt index 94c838b3..83301149 100644 --- a/shuffle-tools/1.2.0/requirements.txt +++ b/shuffle-tools/1.2.0/requirements.txt @@ -8,4 +8,4 @@ json2xml==5.0.5 ipaddress==1.0.23 google.auth==2.37.0 paramiko==3.5.0 -shuffle-sdk==0.0.33 +shuffle-sdk==0.0.35 From ad44e7b3fbd17778063d1c4041029e60ede87878 Mon Sep 17 00:00:00 2001 From: Frikky Date: Wed, 11 Feb 2026 15:18:15 +0100 Subject: [PATCH 15/15] Added time library to shuffle-ai --- shuffle-ai/1.1.0/src/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shuffle-ai/1.1.0/src/app.py b/shuffle-ai/1.1.0/src/app.py index e5f5e9bc..8785b341 100644 --- a/shuffle-ai/1.1.0/src/app.py +++ b/shuffle-ai/1.1.0/src/app.py @@ -2,6 +2,7 @@ import json import tempfile import requests +import time try: import pytesseract