Context Navigation

← Previous Change
Next Change →

class-hashtag.php

Timestamp:

01/13/2026 07:27:36 AM (25 hours ago)

Author:

pfefferle

Message:

Update to version 7.8.3 from GitHub

File:

: 1 edited

activitypub/trunk/includes/class-hashtag.php (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

activitypub/trunk/includes/class-hashtag.php

-                      r3352030
+                      r3438374
      */
     public static function init() {
         if ( '1' === \get_option( 'activitypub_use_hashtags', '1' ) ) {
+        if ( '1' === \get_option( 'activitypub_use_hashtags', '0' ) ) {
             \add_action( 'wp_insert_post', array( self::class, 'insert_post' ), 10, 2 );
             \add_filter( 'the_content', array( self::class, 'the_content' ) );
 …
+        }
+        $content = $post->post_content . "\n" . $post->post_excerpt;
+        $content = self::extract_text_outside_protected_tags( $content );
         $tags = array();
-        // Skip hashtags in HTML attributes, like hex colors.
-        $content = wp_strip_all_tags( $post->post_content . "\n" . $post->post_excerpt );
         if ( \preg_match_all( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', $content, $match ) ) {
             $tags = array_unique( $match[1] );
+            $tags = \array_unique( $match[1] );
+        }
         \wp_add_post_tags( $post->ID, \implode( ', ', $tags ) );
+    }
+    /**
+     * Extract text content from outside protected HTML elements.
+     *
+     * Uses WP_HTML_Tag_Processor to properly parse HTML and skip content inside
+     * protected tags, matching the behavior of enrich_content_data().
+     *
+     * @param string $content The HTML content to process.
+     *
+     * @return string Text content from non-protected areas only.
+     */
+    private static function extract_text_outside_protected_tags( $content ) {
+        $processor = new \WP_HTML_Tag_Processor( $content );
+        /*
+         * Do not process content inside protected tags.
+         *
+         * Note: SCRIPT, STYLE, and TEXTAREA are "atomic" elements in
+         * WP_HTML_Tag_Processor, meaning their content is bundled with the tag
+         * token and won't appear as separate #text nodes. Because of this they
+         * do not need to be listed in $protected_tags: their inner text is
+         * never surfaced as #text tokens for us to process.
+         * See https://github.com/WordPress/wordpress-develop/blob/0fb3bb29596918864d808d156268a2df63c83620/src/wp-includes/html-api/class-wp-html-tag-processor.php#L276
+         */
+        $protected_tags   = array( 'PRE', 'CODE', 'A' );
+        $tag_stack        = array();
+        $filtered_content = '';
+        while ( $processor->next_token() ) {
+            $token_type = $processor->get_token_type();
+            if ( '#tag' === $token_type ) {
+                $tag_name = $processor->get_tag();
+                if ( $processor->is_tag_closer() ) {
+                    // Closing tag: remove from stack.
+                    $i = \array_search( $tag_name, $tag_stack, true );
+                    if ( false !== $i ) {
+                        $tag_stack = \array_slice( $tag_stack, 0, $i );
+                    }
+                } elseif ( \in_array( $tag_name, $protected_tags, true ) ) {
+                    // Opening tag: add to stack.
+                    $tag_stack[] = $tag_name;
+                }
+            } elseif ( '#text' === $token_type && empty( $tag_stack ) ) {
+                // Only include text chunks that are outside protected tags.
+                $filtered_content .= $processor->get_modifiable_text();
+            }
+        }
+        return $filtered_content;
+    }

Note: See TracChangeset for help on using the changeset viewer.

Trac UI Preferences

Plugin Directory

Context Navigation

Changeset 3438374 for activitypub/trunk/includes/class-hashtag.php

Legend:

activitypub/trunk/includes/class-hashtag.php

Download in other formats: