Changeset 3438374 for activitypub/trunk/includes/class-hashtag.php
- Timestamp:
- 01/13/2026 07:27:36 AM (25 hours ago)
- File:
-
- 1 edited
-
activitypub/trunk/includes/class-hashtag.php (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
activitypub/trunk/includes/class-hashtag.php
r3352030 r3438374 18 18 */ 19 19 public static function init() { 20 if ( '1' === \get_option( 'activitypub_use_hashtags', ' 1' ) ) {20 if ( '1' === \get_option( 'activitypub_use_hashtags', '0' ) ) { 21 21 \add_action( 'wp_insert_post', array( self::class, 'insert_post' ), 10, 2 ); 22 22 \add_filter( 'the_content', array( self::class, 'the_content' ) ); … … 62 62 } 63 63 64 $content = $post->post_content . "\n" . $post->post_excerpt; 65 $content = self::extract_text_outside_protected_tags( $content ); 66 64 67 $tags = array(); 65 66 // Skip hashtags in HTML attributes, like hex colors.67 $content = wp_strip_all_tags( $post->post_content . "\n" . $post->post_excerpt );68 69 68 if ( \preg_match_all( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', $content, $match ) ) { 70 $tags = array_unique( $match[1] );69 $tags = \array_unique( $match[1] ); 71 70 } 72 71 73 72 \wp_add_post_tags( $post->ID, \implode( ', ', $tags ) ); 73 } 74 75 /** 76 * Extract text content from outside protected HTML elements. 77 * 78 * Uses WP_HTML_Tag_Processor to properly parse HTML and skip content inside 79 * protected tags, matching the behavior of enrich_content_data(). 80 * 81 * @param string $content The HTML content to process. 82 * 83 * @return string Text content from non-protected areas only. 84 */ 85 private static function extract_text_outside_protected_tags( $content ) { 86 $processor = new \WP_HTML_Tag_Processor( $content ); 87 88 /* 89 * Do not process content inside protected tags. 90 * 91 * Note: SCRIPT, STYLE, and TEXTAREA are "atomic" elements in 92 * WP_HTML_Tag_Processor, meaning their content is bundled with the tag 93 * token and won't appear as separate #text nodes. Because of this they 94 * do not need to be listed in $protected_tags: their inner text is 95 * never surfaced as #text tokens for us to process. 96 * See https://github.com/WordPress/wordpress-develop/blob/0fb3bb29596918864d808d156268a2df63c83620/src/wp-includes/html-api/class-wp-html-tag-processor.php#L276 97 */ 98 $protected_tags = array( 'PRE', 'CODE', 'A' ); 99 $tag_stack = array(); 100 $filtered_content = ''; 101 102 while ( $processor->next_token() ) { 103 $token_type = $processor->get_token_type(); 104 105 if ( '#tag' === $token_type ) { 106 $tag_name = $processor->get_tag(); 107 108 if ( $processor->is_tag_closer() ) { 109 // Closing tag: remove from stack. 110 $i = \array_search( $tag_name, $tag_stack, true ); 111 if ( false !== $i ) { 112 $tag_stack = \array_slice( $tag_stack, 0, $i ); 113 } 114 } elseif ( \in_array( $tag_name, $protected_tags, true ) ) { 115 // Opening tag: add to stack. 116 $tag_stack[] = $tag_name; 117 } 118 } elseif ( '#text' === $token_type && empty( $tag_stack ) ) { 119 // Only include text chunks that are outside protected tags. 120 $filtered_content .= $processor->get_modifiable_text(); 121 } 122 } 123 124 return $filtered_content; 74 125 } 75 126
Note: See TracChangeset
for help on using the changeset viewer.