Changeset 61467
- Timestamp:
- 01/10/2026 09:45:09 PM (3 days ago)
- Location:
- trunk
- Files:
-
- 1 added
- 2 edited
-
src/wp-includes/kses.php (modified) (1 diff)
-
tests/phpunit/tests/kses/wpKsesHair.php (added)
-
tests/phpunit/tests/media.php (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/kses.php
r61436 r61467 1586 1586 1587 1587 /** 1588 * Builds an attribute list from string containing attributes. 1589 * 1590 * This function does a lot of work. It parses an attribute list into an array 1591 * with attribute data, and tries to do the right thing even if it gets weird 1592 * input. It will add quotes around attribute values that don't have any quotes 1593 * or apostrophes around them, to make it easier to produce HTML code that will 1594 * conform to W3C's HTML specification. It will also remove bad URL protocols 1595 * from attribute values. It also reduces duplicate attributes by using the 1596 * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). 1588 * Given a string of HTML attributes and values, parse into a structured attribute list. 1589 * 1590 * This function performs a number of transformations while parsing attribute strings: 1591 * - It normalizes attribute values and surrounds them with double quotes. 1592 * - It normalizes HTML character references inside attribute values. 1593 * - It removes “bad” URL protocols from attribute values. 1594 * 1595 * Otherwise this reads the attributes as if they were part of an HTML tag. It performs 1596 * these transformations to lower the risk of mis-parsing down the line and to perform 1597 * URL sanitization in line with the rest of the `kses` subsystem. Importantly, it does 1598 * not decode the attribute values, meaning that special HTML syntax characters will 1599 * be left with character references in the `value` property. 1600 * 1601 * Example: 1602 * 1603 * $attrs = wp_kses_hair( 'class="is-wide" inert data-lazy=\'<img>\' =/🐮=/' ); 1604 * $attrs === array( 1605 * 'class' => array( 'name' => 'class', 'value' => 'is-wide', 'whole' => 'class="is-wide"', 'vless' => 'n' ), 1606 * 'inert' => array( 'name' => 'inert', 'value' => '', 'whole' => 'inert', 'vless' => 'y' ), 1607 * 'data-lazy' => array( 'name' => 'data-lazy', 'value' => '<img>', 'whole' => 'data-lazy="<img>"', 'vless' => 'n' ), 1608 * '=' => array( 'name' => '=', 'value' => '', 'whole' => '=', 'vless' => 'y' ), 1609 * '🐮' => array( 'name' => '🐮', 'value' => '/', 'whole' => '🐮="/"', 'vless' => 'n' ), 1610 * ); 1597 1611 * 1598 1612 * @since 1.0.0 1613 * @since 7.0.0 Reliably parses HTML via the HTML API. 1599 1614 * 1600 1615 * @param string $attr Attribute list from HTML element to closing HTML element tag. 1601 1616 * @param string[] $allowed_protocols Array of allowed URL protocols. 1602 * @return array []Array of attribute information after parsing.1617 * @return array<string, array{name: string, value: string, whole: string, vless: 'y'|'n'}> Array of attribute information after parsing. 1603 1618 */ 1604 1619 function wp_kses_hair( $attr, $allowed_protocols ) { 1605 $attrarr = array(); 1606 $mode = 0; 1607 $attrname = ''; 1608 $uris = wp_kses_uri_attributes(); 1609 1610 // Loop through the whole attribute list. 1611 1612 while ( strlen( $attr ) !== 0 ) { 1613 $working = 0; // Was the last operation successful? 1614 1615 switch ( $mode ) { 1616 case 0: 1617 if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { 1618 $attrname = $match[1]; 1619 $working = 1; 1620 $mode = 1; 1621 $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); 1622 } 1623 1624 break; 1625 1626 case 1: 1627 if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. 1628 $working = 1; 1629 $mode = 2; 1630 $attr = preg_replace( '/^\s*=\s*/', '', $attr ); 1631 break; 1632 } 1633 1634 if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. 1635 $working = 1; 1636 $mode = 0; 1637 1638 if ( false === array_key_exists( $attrname, $attrarr ) ) { 1639 $attrarr[ $attrname ] = array( 1640 'name' => $attrname, 1641 'value' => '', 1642 'whole' => $attrname, 1643 'vless' => 'y', 1644 ); 1645 } 1646 1647 $attr = preg_replace( '/^\s+/', '', $attr ); 1648 } 1649 1650 break; 1651 1652 case 2: 1653 if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { 1654 // "value" 1655 $thisval = $match[1]; 1656 if ( in_array( strtolower( $attrname ), $uris, true ) ) { 1657 $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); 1658 } 1659 1660 if ( false === array_key_exists( $attrname, $attrarr ) ) { 1661 $attrarr[ $attrname ] = array( 1662 'name' => $attrname, 1663 'value' => $thisval, 1664 'whole' => "$attrname=\"$thisval\"", 1665 'vless' => 'n', 1666 ); 1667 } 1668 1669 $working = 1; 1670 $mode = 0; 1671 $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); 1672 break; 1673 } 1674 1675 if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { 1676 // 'value' 1677 $thisval = $match[1]; 1678 if ( in_array( strtolower( $attrname ), $uris, true ) ) { 1679 $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); 1680 } 1681 1682 if ( false === array_key_exists( $attrname, $attrarr ) ) { 1683 $attrarr[ $attrname ] = array( 1684 'name' => $attrname, 1685 'value' => $thisval, 1686 'whole' => "$attrname='$thisval'", 1687 'vless' => 'n', 1688 ); 1689 } 1690 1691 $working = 1; 1692 $mode = 0; 1693 $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); 1694 break; 1695 } 1696 1697 if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { 1698 // value 1699 $thisval = $match[1]; 1700 if ( in_array( strtolower( $attrname ), $uris, true ) ) { 1701 $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); 1702 } 1703 1704 if ( false === array_key_exists( $attrname, $attrarr ) ) { 1705 $attrarr[ $attrname ] = array( 1706 'name' => $attrname, 1707 'value' => $thisval, 1708 'whole' => "$attrname=\"$thisval\"", 1709 'vless' => 'n', 1710 ); 1711 } 1712 1713 // We add quotes to conform to W3C's HTML spec. 1714 $working = 1; 1715 $mode = 0; 1716 $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); 1717 } 1718 1719 break; 1720 } // End switch. 1721 1722 if ( 0 === $working ) { // Not well-formed, remove and try again. 1723 $attr = wp_kses_html_error( $attr ); 1724 $mode = 0; 1620 $attributes = array(); 1621 $uris = wp_kses_uri_attributes(); 1622 1623 $processor = new WP_HTML_Tag_Processor( "<wp {$attr}>" ); 1624 $processor->next_token(); 1625 1626 $syntax_characters = array( 1627 '&' => '&', 1628 '<' => '<', 1629 '>' => '>', 1630 "'" => ''', 1631 '"' => '"', 1632 ); 1633 1634 foreach ( $processor->get_attribute_names_with_prefix( '' ) as $name ) { 1635 $value = $processor->get_attribute( $name ); 1636 $is_bool = true === $value; 1637 if ( is_string( $value ) && in_array( $name, $uris, true ) ) { 1638 $value = wp_kses_bad_protocol( $value, $allowed_protocols ); 1725 1639 } 1726 } // End while. 1727 1728 if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { 1729 /* 1730 * Special case, for when the attribute list ends with a valueless 1731 * attribute like "selected". 1732 */ 1733 $attrarr[ $attrname ] = array( 1734 'name' => $attrname, 1735 'value' => '', 1736 'whole' => $attrname, 1737 'vless' => 'y', 1640 1641 // Reconstruct and normalize the attribute value. 1642 $recoded = $is_bool ? '' : strtr( $value, $syntax_characters ); 1643 $whole = $is_bool ? $name : "{$name}=\"{$recoded}\""; 1644 1645 $attributes[ $name ] = array( 1646 'name' => $name, 1647 'value' => $recoded, 1648 'whole' => $whole, 1649 'vless' => $is_bool ? 'y' : 'n', 1738 1650 ); 1739 1651 } 1740 1652 1741 return $attr arr;1653 return $attributes; 1742 1654 } 1743 1655 -
trunk/tests/phpunit/tests/media.php
r61416 r61467 228 228 self::HTML_CONTENT, 229 229 $mark, 230 'Test caption content should not contain the mark surround it: check test setup.'230 'Test caption content should not contain the mark surrounding it: check test setup.' 231 231 ); 232 232
Note: See TracChangeset
for help on using the changeset viewer.