diff --git a/.gitattributes b/.gitattributes index 6e5411e..7f5d248 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,10 @@ -.gitignore export-ignore -.gitattributes export-ignore -/Examples export-ignore \ No newline at end of file +# Exclude from Composer dist tarballs. +/.github export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.php-cs-fixer.dist.php export-ignore +/.phpunit.cache export-ignore +/docs export-ignore +/phpstan.neon.dist export-ignore +/phpunit.xml.dist export-ignore +/tests export-ignore diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..fefbebf --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,139 @@ +name: CI + +on: + push: + branches: [main, "*.x"] + pull_request: + branches: [main, "*.x"] + +jobs: + validate: + name: composer validate + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + coverage: none + tools: composer:v2 + - run: composer validate --strict + + cs: + name: PHP-CS-Fixer + runs-on: ubuntu-latest + needs: validate + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + - name: Check coding standards + run: composer cs-check + + stan: + name: PHPStan + runs-on: ubuntu-latest + needs: validate + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + - name: Run PHPStan + run: composer stan + + tests: + name: PHPUnit (PHP ${{ matrix.php }}, ${{ matrix.deps }}) + runs-on: ubuntu-latest + needs: validate + strategy: + fail-fast: false + matrix: + php: ["7.4", "8.0", "8.1", "8.2", "8.3", "8.4"] + deps: ["highest"] + include: + - php: "7.4" + deps: "lowest" + - php: "8.4" + deps: "lowest" + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP ${{ matrix.php }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + + - name: Validate composer.json + run: composer validate --no-check-publish + + - name: Get composer cache directory + id: composer-cache + run: echo "dir=$(composer config cache-files-dir)" >> "$GITHUB_OUTPUT" + + - name: Cache composer dependencies + uses: actions/cache@v4 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: composer-${{ matrix.php }}-${{ matrix.deps }}-${{ hashFiles('**/composer.json') }} + restore-keys: composer-${{ matrix.php }}-${{ matrix.deps }}- + + - name: Install highest dependencies + if: matrix.deps == 'highest' + run: composer update --prefer-dist --no-progress --no-interaction + + - name: Install lowest dependencies + if: matrix.deps == 'lowest' + run: composer update --prefer-dist --no-progress --no-interaction --prefer-lowest --prefer-stable + + - name: Run PHPUnit + run: vendor/bin/phpunit + + coverage: + name: Coverage + runs-on: ubuntu-latest + needs: tests + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: pcov + tools: composer:v2 + + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + + - name: Run PHPUnit with coverage + run: vendor/bin/phpunit --coverage-clover=coverage.xml + + - name: Upload coverage artifact + uses: actions/upload-artifact@v4 + with: + name: coverage-clover + path: coverage.xml + retention-days: 14 + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + files: ./coverage.xml + flags: phpunit + fail_ci_if_error: false diff --git a/.gitignore b/.gitignore index 0abe7ad..8bb648c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,8 @@ /.vscode/ /.vs/ /vendor/ -/composer.lock \ No newline at end of file +/composer.lock +/build/ +/.phpunit.cache/ +/.phpunit.result.cache +/.php-cs-fixer.cache diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php new file mode 100644 index 0000000..7deb8da --- /dev/null +++ b/.php-cs-fixer.dist.php @@ -0,0 +1,32 @@ +in([__DIR__ . '/src', __DIR__ . '/tests']) + ->name('*.php'); + +return (new PhpCsFixer\Config()) + ->setRiskyAllowed(true) + ->setRules([ + '@PSR12' => true, + '@PSR12:risky' => true, + '@PHP74Migration' => true, + '@PHP74Migration:risky' => true, + 'array_syntax' => ['syntax' => 'short'], + 'declare_strict_types' => true, + 'native_function_invocation' => [ + 'include' => ['@compiler_optimized'], + 'scope' => 'namespaced', + 'strict' => true, + ], + 'no_unused_imports' => true, + 'ordered_imports' => [ + 'imports_order' => ['class', 'function', 'const'], + 'sort_algorithm' => 'alpha', + ], + 'single_quote' => true, + 'trailing_comma_in_multiline' => ['elements' => ['arrays']], + ]) + ->setFinder($finder) + ->setCacheFile(__DIR__ . '/build/php-cs-fixer.cache'); diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3ad7245 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,85 @@ +# Changelog + +All notable changes to `initphp/escaper` are documented here. + +The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [2.0.0] + +A reliability- and correctness-focused major release. Several bug fixes +in this release are visible from the outside, so the version bump is +necessary even though the public surface is unchanged. See +[`UPGRADE-2.0.md`](./UPGRADE-2.0.md) for a step-by-step migration guide. + +### Added + +- Dedicated exception hierarchy under `InitPHP\Escaper\Exception\`: + `EscaperException` (base, extends `\RuntimeException`), + `EncodingNotSupportedException`, `EncodingConversionException`, + `InvalidContextException`, `InvalidUtf8Exception`. +- Full PHPUnit test suite under `tests/` (62 tests, 100 assertions, + ~91% line coverage). +- GitHub Actions CI: `composer validate`, PHP-CS-Fixer, PHPStan (max), + PHPUnit on PHP 7.4 – 8.4, coverage upload. +- PHPStan configuration at the `max` level with zero reported issues. +- PHP-CS-Fixer configuration based on `@PSR12` and + `@PHP74Migration` rule sets. +- Developer documentation under `docs/` covering each escape context, + encoding handling, exceptions and security notes. +- `Esc::reset()` helper to clear the memoised `Escaper` cache (used by + tests; useful when the calling code wants to drop cached instances). +- `composer.json` scripts: `test`, `test-coverage`, `stan`, `cs-check`, + `cs-fix`, `ci`. + +### Changed + +- **`Esc::esc()` recursion** now propagates `$encoding` into recursive + calls. Previously the encoding was dropped on every inner call, + silently defaulting to UTF-8 for nested arrays. +- **`Esc::esc()` instance cache** is now keyed by encoding. The previous + cache compared `$escaper->getEncoding()` (`'utf-8'`) against the raw + `$encoding` argument (often `null`), so the cache rebuilt on every + default call. +- **`Escaper` constructor** raises `EncodingNotSupportedException` + instead of `\Exception`. (Still catchable via `\Exception` / + `\RuntimeException`.) +- **`HTML attribute` matcher** evaluates the C0/C1 control-character + check against the decoded code point, so `U+0080`–`U+009F` are now + correctly replaced with `U+FFFD` when they arrive in multibyte UTF-8 + form. Previously only single-byte controls were caught. +- **`composer.json`** now requires `ext-mbstring`. `ext-iconv` remains + optional and is preferred when present (`suggest` entry added). +- **PHPDoc blocks** rewritten across the package to reflect the actual + code behaviour. + +### Fixed + +- **Silent data loss on encoding-conversion failure.** When `iconv` / + `mb_convert_encoding` returned `false`, `Escaper::convertEncoding()` + previously returned an empty string and let it propagate, masking + real failures. It now raises `EncodingConversionException`. +- **`isUtf8()`** uses explicit `=== 1` comparison against + `preg_match()` instead of relying on PHP's loose type coercion in a + `bool` return. +- **Misleading error message** in `convertEncoding()`: the "MB_String + plugin is required" text appeared even when iconv was tried first. + Replaced with "Either ext-iconv or ext-mbstring is required". +- **Unused callable properties** (`$htmlAttrMatcher`, `$jsMatcher`, + `$cssMatcher`) removed. The matchers are now passed inline to + `preg_replace_callback`. + +### Removed + +- **`Examples/`** directory removed. The same scenarios are documented + under [`docs/`](./docs) with verified output for each example. + +## [1.0] + +Initial release. + +[Unreleased]: https://github.com/InitPHP/Escaper/compare/2.0.0...HEAD +[2.0.0]: https://github.com/InitPHP/Escaper/compare/1.0...2.0.0 +[1.0]: https://github.com/InitPHP/Escaper/releases/tag/1.0 diff --git a/Examples/Attr.php b/Examples/Attr.php deleted file mode 100644 index 1d5e62d..0000000 --- a/Examples/Attr.php +++ /dev/null @@ -1,23 +0,0 @@ - - - - - Quoteless Attribute - - - -
- - ?> - > - Hello World - -
- - \ No newline at end of file diff --git a/Examples/Css.php b/Examples/Css.php deleted file mode 100644 index bf5443e..0000000 --- a/Examples/Css.php +++ /dev/null @@ -1,28 +0,0 @@ -'); -} -INPUT; -?> - - - - Escaped CSS - - - - -

User controlled CSS needs to be properly escaped!

- - \ No newline at end of file diff --git a/Examples/Html.php b/Examples/Html.php deleted file mode 100644 index be8bad7..0000000 --- a/Examples/Html.php +++ /dev/null @@ -1,20 +0,0 @@ - - - - - Encodings set correctly! - - - -alert("initphp")'; - -// <script>alert("initphp")</script> -echo Esc::esc($input, 'html'); - -?> - \ No newline at end of file diff --git a/Examples/Js.php b/Examples/Js.php deleted file mode 100644 index 6156d03..0000000 --- a/Examples/Js.php +++ /dev/null @@ -1,24 +0,0 @@ - - - - - Escaped Entities - - - - -

Hello World

- - \ No newline at end of file diff --git a/Examples/Url.php b/Examples/Url.php deleted file mode 100644 index e8654a2..0000000 --- a/Examples/Url.php +++ /dev/null @@ -1,21 +0,0 @@ - - - - - Unescaped URL data - - - - -Click here! - - \ No newline at end of file diff --git a/README.md b/README.md index 5c8e332..6133c91 100644 --- a/README.md +++ b/README.md @@ -1,180 +1,156 @@ -# InitPHP Escaper +# initphp/escaper -Securely and safely escape HTML, HTML attributes, JavaScript, CSS, and URLs. +Context-aware output escaper for PHP. Safely render untrusted user input inside +HTML, HTML attributes, JavaScript, CSS and URLs. -[![Latest Stable Version](http://poser.pugx.org/initphp/escaper/v)](https://packagist.org/packages/initphp/escaper) [![Total Downloads](http://poser.pugx.org/initphp/escaper/downloads)](https://packagist.org/packages/initphp/escaper) [![Latest Unstable Version](http://poser.pugx.org/initphp/escaper/v/unstable)](https://packagist.org/packages/initphp/escaper) [![License](http://poser.pugx.org/initphp/escaper/license)](https://packagist.org/packages/initphp/escaper) [![PHP Version Require](http://poser.pugx.org/initphp/escaper/require/php)](https://packagist.org/packages/initphp/escaper) +[![Latest Stable Version](https://poser.pugx.org/initphp/escaper/v)](https://packagist.org/packages/initphp/escaper) +[![PHP Version Require](https://poser.pugx.org/initphp/escaper/require/php)](https://packagist.org/packages/initphp/escaper) +[![CI](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml/badge.svg)](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml) +[![codecov](https://codecov.io/gh/InitPHP/Escaper/branch/main/graph/badge.svg)](https://codecov.io/gh/InitPHP/Escaper) +[![License](https://poser.pugx.org/initphp/escaper/license)](https://packagist.org/packages/initphp/escaper) +[![Total Downloads](https://poser.pugx.org/initphp/escaper/downloads)](https://packagist.org/packages/initphp/escaper) -## Requirements +`htmlspecialchars()` is not enough on its own. Each output context — an HTML +body, an attribute, a JavaScript string literal, a CSS value, a URL parameter +— needs its own escaping rules, and using the wrong one can leave you exposed +to XSS even when you *think* you have escaped your data. -- PHP 7.4 or higher -- PHP _CType_ Extension -- PHP _MB_String_ or _Iconv_ Extension +`initphp/escaper` implements the rules from the +[OWASP XSS Prevention Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html) +for the five most common contexts, behind a small, dependency-free API. ## Installation -```php +```bash composer require initphp/escaper ``` -## Usage +### Requirements -`\InitPHP\Escaper\Esc::esc()` : +- PHP 7.4 or newer +- `ext-ctype` +- `ext-mbstring` (required); `ext-iconv` is used when present and preferred + over mbstring -```php -public static function esc(string[]|string $data, string $context = 'html', ?string $encoding = null): array|string; -``` +## Quick start + +```php +use InitPHP\Escaper\Esc; + +echo Esc::esc(''); +// <script>alert(1)</script> + +echo Esc::esc('faketitle onmouseover=alert(1);', 'attr'); +// faketitle onmouseover=alert(1); + +echo Esc::esc('"; alert(1); var x="', 'js'); +// \x22\x3B\x20alert\x281\x29\x3B\x20var\x20x\x3D\x22 -- `$data` : The content to be cleared. -- `$context` : The method to be used for cleaning. If the value is not one of the following; Throws `Exception`. - - `html` - - `js` - - `css` - - `url` - - `attr` -- `$encoding` : If the character set to be used is not specified or `NULL`; `UTF-8` is used by default. - -`html` Escaper Example : -```php -alert("initphp")'; -?> - - - - Encodings set correctly! - - - - - - +echo Esc::esc('', 'css'); +// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E + +echo Esc::esc('" onmouseover="alert(1)', 'url'); +// %22%20onmouseover%3D%22alert%281%29 ``` -`attr` Escaper Example : +`Esc::esc()` also accepts arrays and recurses into them, so escaping a whole +request payload at the view boundary is a one-liner: ```php - - - - - Quoteless Attribute - - - -
- - ?> - > - Hello World - -
- - +$safe = Esc::esc($_GET, 'html'); ``` -`Js` Escaper Example : +## API -```php - - - - - Escaped Entities - - - - -

Hello World

- - +```php +public static function esc( + array|string $data, + string $context = 'html', + ?string $encoding = null +): array|string; ``` -`css` Escaper Example : +| Argument | Description | +| ----------- | --------------------------------------------------------------------------- | +| `$data` | A string, or an array (which is escaped recursively). | +| `$context` | `html`, `attr`, `js`, `css`, `url`, or `raw` (returns input unchanged). | +| `$encoding` | Output encoding. `null` resolves to UTF-8. See [Encodings](docs/encodings.md). | + +Throws `InitPHP\Escaper\Exception\InvalidContextException` for unknown contexts. + +### `Escaper` + +For lower-level use, instantiate `Escaper` directly. Each instance is bound to +one encoding and exposes one method per context: ```php -'); -} -INPUT; -?> - - - - Escaped CSS - - - - -

User controlled CSS needs to be properly escaped!

- - +use InitPHP\Escaper\Escaper; + +$escaper = new Escaper(); // utf-8 +$escaper = new Escaper('windows-1252'); + +$escaper->escHtml($string); +$escaper->escHtmlAttr($string); +$escaper->escJs($string); +$escaper->escCss($string); +$escaper->escUrl($string); ``` -`url` Escaper Example : +## Documentation -```php - - - - - Unescaped URL - - - - -Click - - +The [`docs/`](docs/) directory contains a per-context walkthrough with +examples, do-and-don't guidance and security notes: + +- [Getting started](docs/getting-started.md) +- [HTML body context](docs/context-html.md) +- [HTML attribute context](docs/context-html-attribute.md) +- [JavaScript context](docs/context-javascript.md) +- [CSS context](docs/context-css.md) +- [URL context](docs/context-url.md) +- [Encodings](docs/encodings.md) +- [Exceptions](docs/exceptions.md) +- [Security notes](docs/security-notes.md) + +## A word of warning + +> Output escaping prevents XSS but it is not a substitute for input validation, +> authentication, or authorisation. It is also context-sensitive: the +> JavaScript escaper assumes the caller wraps the result in quotes, the HTML +> attribute escaper assumes the value is used as a single attribute value, and +> so on. Read the per-context docs before mixing contexts. + +## Contributing + +Contributions are welcome. Please read the +[org-wide CONTRIBUTING guide](https://github.com/InitPHP/.github/blob/main/CONTRIBUTING.md) +for the workflow, coding standards and test expectations. + +A typical loop is: + +```bash +git clone https://github.com/InitPHP/Escaper.git +cd Escaper +composer install +composer ci # cs-check + phpstan + phpunit ``` -## Credits +Individual steps are also available: + +| Command | What it does | +| ------------------ | ------------------------------------------- | +| `composer test` | Run PHPUnit | +| `composer stan` | Run PHPStan (max level) | +| `composer cs-check`| Report PHP-CS-Fixer violations, no changes | +| `composer cs-fix` | Apply PHP-CS-Fixer changes | + +## Security -- [Muhammet ŞAFAK](https://www.muhammetsafak.com.tr) <> +If you discover a security issue, please follow the disclosure process +documented in [SECURITY.md](https://github.com/InitPHP/.github/blob/main/SECURITY.md) +rather than opening a public issue. ## License -Copyright © 2022 [MIT License](./LICENSE) +Released under the [MIT License](./LICENSE). © InitPHP. diff --git a/UPGRADE-2.0.md b/UPGRADE-2.0.md new file mode 100644 index 0000000..891a1ed --- /dev/null +++ b/UPGRADE-2.0.md @@ -0,0 +1,140 @@ +# Upgrading from 1.x to 2.0 + +`initphp/escaper` 2.0 is a correctness release. The public API surface +is unchanged — every 1.x method still exists with the same signature. +What changed is **how the escaper signals failure** and **what happens +in a few edge cases that were latent bugs in 1.x**. + +If your 1.x code only calls `Esc::esc()` or `Escaper::escHtml()` etc. +in the happy path, you should be able to upgrade without code changes. +The notes below cover the cases where you may need to act. + +## 1. New `composer require`: `ext-mbstring` + +`composer.json` now declares `ext-mbstring` as a hard requirement +(`ext-iconv` remains optional but is preferred when present). If your +production image does not bundle mbstring you must add it: + +```Dockerfile +RUN docker-php-ext-install mbstring +``` + +Or on a Debian/Ubuntu host: + +```bash +apt-get install -y php-mbstring +``` + +## 2. Replace `catch (\Exception $e)` blocks (recommended) + +1.x threw a plain `\Exception`. 2.x ships a dedicated exception tree. +Your existing `\Exception` (or `\Throwable`) catches still work because +the new exceptions extend `\RuntimeException`, but you can now be +specific: + +```diff + use InitPHP\Escaper\Esc; ++use InitPHP\Escaper\Exception\EscaperException; + + try { + echo Esc::esc($value, 'attr'); +-} catch (\Exception $e) { ++} catch (EscaperException $e) { + // … + } +``` + +The full tree: + +``` +\RuntimeException + └─ InitPHP\Escaper\Exception\EscaperException + ├─ EncodingNotSupportedException // unsupported encoding constructor arg + ├─ EncodingConversionException // iconv/mbstring failure (NEW behaviour, see §3) + ├─ InvalidContextException // unknown context passed to Esc::esc() + └─ InvalidUtf8Exception // input is not / cannot be UTF-8 +``` + +## 3. Encoding-conversion failure now throws (behavioural break) + +In 1.x, if `iconv` / `mb_convert_encoding` returned `false`, the +escaper silently substituted an empty string and returned it. That +silently destroyed data. 2.x raises `EncodingConversionException` +instead. + +If you rely on the old "empty string on failure" behaviour, add an +explicit `try`/`catch`: + +```php +try { + $safe = $escaper->escHtmlAttr($value); +} catch (EncodingConversionException $e) { + $safe = ''; +} +``` + +Most callers will want the exception. If you were silently corrupting +output before, you will now see the error. + +## 4. `Esc::esc()` on arrays now keeps the `$encoding` argument + +This is a bug fix. In 1.x, `Esc::esc(['x' => $v], 'html', +'iso-8859-1')` recursed into the array and called itself **without +the encoding**, so every nested value escaped as UTF-8 regardless of +the third argument. 2.x propagates the encoding correctly. + +If you were depending on the bug (i.e. you passed an encoding but +expected UTF-8 for nested values), drop the encoding argument: + +```diff +-Esc::esc($payload, 'html', 'iso-8859-1'); ++Esc::esc($payload, 'html'); +``` + +## 5. C1 control characters in multibyte UTF-8 + +`escHtmlAttr` always replaced single-byte C0/C1 controls with the +Unicode replacement character (`U+FFFD`). In 1.x the replacement only +fired against the **first byte** of a multibyte sequence, so +`U+0080`–`U+009F` in their proper 2-byte UTF-8 form (`\xC2\x80` … +`\xC2\x9F`) survived as numeric character references (`€` … +`Ÿ`) instead of being replaced. + +2.x catches both forms. The output for those exact code points +changed from `€` etc. to `�`. Both are XSS-safe; if you +were diffing output byte-for-byte across versions, expect this drift. + +## 6. `Esc::esc()` cache is now actually effective + +Not a BC break, but worth knowing: in 1.x the static cache rebuilt the +`Escaper` on every call when `$encoding === null`. 2.x caches per +encoding. No code change needed — your default-encoding calls just got +faster. + +## 7. Examples directory removed + +The runnable PHP files under `Examples/` are gone. The same scenarios +live under [`docs/`](./docs) with each output verified by running the +escaper itself. If you scripted against the example file paths, point +your tooling at `docs/` instead. + +## 8. Static analysis & coding-standard tooling (dev only) + +If you have a fork or downstream patches, note that 2.x adds: + +- `phpstan.neon.dist` (level `max`, zero errors) +- `.php-cs-fixer.dist.php` (`@PSR12 + @PHP74Migration`) + +Your local changes should pass `composer ci` before being submitted as +PRs. + +## Summary checklist + +- [ ] `ext-mbstring` available in every environment. +- [ ] `catch (\Exception)` → `catch (EscaperException)` (optional). +- [ ] Handle `EncodingConversionException` if you used to rely on the + silent empty-string fallback. +- [ ] Drop redundant `$encoding` arguments that depended on the + recursion bug. +- [ ] Re-run any byte-for-byte output snapshots that include the + `U+0080`–`U+009F` range. diff --git a/composer.json b/composer.json index ad53f95..663399d 100644 --- a/composer.json +++ b/composer.json @@ -1,13 +1,20 @@ { "name": "initphp/escaper", - "description": "InitPHP Escaper Class", + "description": "Context-aware output escaper (HTML, attribute, JavaScript, CSS, URL) for safely rendering untrusted user input.", "type": "library", "license": "MIT", - "autoload": { - "psr-4": { - "InitPHP\\Escaper\\": "src/" - } - }, + "keywords": [ + "escaper", + "escape", + "xss", + "security", + "html", + "javascript", + "css", + "url", + "output-encoding", + "owasp" + ], "authors": [ { "name": "Muhammet ŞAFAK", @@ -16,9 +23,59 @@ "homepage": "https://www.muhammetsafak.com.tr" } ], - "minimum-stability": "stable", + "support": { + "issues": "https://github.com/InitPHP/Escaper/issues", + "source": "https://github.com/InitPHP/Escaper", + "docs": "https://github.com/InitPHP/Escaper/tree/main/docs" + }, "require": { "php": ">=7.4", - "ext-ctype": "*" + "ext-ctype": "*", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^9.6 || ^10.5 || ^11.5", + "phpstan/phpstan": "^1.12 || ^2.1", + "friendsofphp/php-cs-fixer": "^3.65" + }, + "suggest": { + "ext-iconv": "Preferred for encoding conversion; iconv is tried before mbstring." + }, + "autoload": { + "psr-4": { + "InitPHP\\Escaper\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "InitPHP\\Escaper\\Tests\\": "tests/" + } + }, + "scripts": { + "test": "phpunit", + "test-coverage": "phpunit --coverage-html build/coverage", + "stan": "phpstan analyse --no-progress", + "cs-check": "php-cs-fixer fix --dry-run --diff", + "cs-fix": "php-cs-fixer fix", + "ci": [ + "@cs-check", + "@stan", + "@test" + ] + }, + "scripts-descriptions": { + "test": "Run the PHPUnit test suite.", + "test-coverage": "Run PHPUnit and produce an HTML coverage report under build/coverage.", + "stan": "Run PHPStan at the level configured in phpstan.neon.dist.", + "cs-check": "Report any PHP-CS-Fixer violations without modifying files.", + "cs-fix": "Apply PHP-CS-Fixer fixes in-place.", + "ci": "Run the full CI bundle locally: cs-check, stan, test." + }, + "minimum-stability": "stable", + "extra": { + "branch-alias": { + "dev-2.x": "2.0.x-dev", + "dev-main": "2.0.x-dev" + } } } diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..3167a53 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,33 @@ +# initphp/escaper — Documentation + +This directory is the developer reference for `initphp/escaper`. The +top-level [README](../README.md) is intentionally short; everything in +depth lives here. + +## Index + +1. [Getting started](getting-started.md) — install, first call, the `Esc` + facade vs. instantiating `Escaper`. +2. **Per-context guides** — one file per output context, with the rules + the escaper applies, the threats it defeats, and runnable examples: + - [HTML body context](context-html.md) (`escHtml`) + - [HTML attribute context](context-html-attribute.md) (`escHtmlAttr`) + - [JavaScript context](context-javascript.md) (`escJs`) + - [CSS context](context-css.md) (`escCss`) + - [URL context](context-url.md) (`escUrl`) +3. [Encodings](encodings.md) — non-UTF-8 input/output, the supported list + and how conversion is performed. +4. [Exceptions](exceptions.md) — the exception tree and when each one + is thrown. +5. [Security notes](security-notes.md) — caveats, common misuses, and + pointers to authoritative sources. + +## Conventions used in these docs + +- Code samples assume the autoloader has already been required. +- Output shown in `// comments` is the literal string the escaper + returns. Each sample was generated by running the escaper itself, not + hand-written. +- "Untrusted" means any data that has touched the network, the + filesystem, a database, or anything else outside your PHP process — + in other words, "almost everything". diff --git a/docs/context-css.md b/docs/context-css.md new file mode 100644 index 0000000..ba47c91 --- /dev/null +++ b/docs/context-css.md @@ -0,0 +1,59 @@ +# CSS context (`escCss`) + +> Use when the value lands inside a CSS property value: +> `color: HERE;`, `background-image: url(HERE);`, ``. + +## What it does + +`escCss` whitelists `[A-Za-z0-9]`. Every other character is rewritten as +the CSS escape sequence `\HEX `, with the **mandatory trailing space** +that terminates the escape. + +The trailing space looks redundant when followed by another character, +but CSS uses it as a delimiter — without it, the parser would eat +hex-digit-looking characters that follow the escape. Always emit it. + +## Example — preventing a `` breakout + +```php +use InitPHP\Escaper\Esc; + +$untrusted = ''; + +echo Esc::esc($untrusted, 'css'); +// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E +``` + +The `<`, `>`, `/`, `(`, `)`, and spaces all turn into CSS escapes, so +the attacker cannot close the `'; + $output = $this->escaper->escCss($input); + + self::assertSame( + '\\3C \\2F style\\3E \\3C script\\3E alert\\28 1\\29 \\3C \\2F script\\3E ', + $output + ); + } + + public function testBmpMultibyteCharacterBecomesHexEscape(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('\\15F ', $this->escaper->escCss('ş')); + } + + public function testSupplementaryPlaneCharacterBecomesHexEscape(): void + { + // U+1F680 → 1F680 in hex. + self::assertSame('\\1F680 ', $this->escaper->escCss('🚀')); + } +} diff --git a/tests/EscaperEncodingTest.php b/tests/EscaperEncodingTest.php new file mode 100644 index 0000000..9aaf639 --- /dev/null +++ b/tests/EscaperEncodingTest.php @@ -0,0 +1,112 @@ +getEncoding()); + } + + public function testNullEncodingResolvesToUtf8(): void + { + self::assertSame('utf-8', (new Escaper(null))->getEncoding()); + } + + public function testEmptyStringEncodingResolvesToUtf8(): void + { + self::assertSame('utf-8', (new Escaper(''))->getEncoding()); + } + + public function testEncodingLookupIsCaseInsensitive(): void + { + self::assertSame('utf-8', (new Escaper('UTF-8'))->getEncoding()); + self::assertSame('windows-1252', (new Escaper('Windows-1252'))->getEncoding()); + self::assertSame('iso-8859-1', (new Escaper('ISO-8859-1'))->getEncoding()); + } + + public function testUnsupportedEncodingThrows(): void + { + $this->expectException(EncodingNotSupportedException::class); + $this->expectExceptionMessage('Encoding "utf-16" is not supported.'); + + new Escaper('utf-16'); + } + + public function testEncodingExceptionIsAnEscaperException(): void + { + try { + new Escaper('not-a-real-encoding'); + self::fail('Expected EncodingNotSupportedException'); + } catch (EscaperException $e) { + self::assertInstanceOf(EncodingNotSupportedException::class, $e); + } + } + + public function testNonUtf8InputIsConvertedThenEscaped(): void + { + $escaper = new Escaper('iso-8859-1'); + + // ISO-8859-1 byte 0xE9 == "é". When fed in as a single byte the + // escaper must first decode it to UTF-8, then re-encode the output + // back to ISO-8859-1. + $output = $escaper->escHtml("\xE9"); + + // htmlspecialchars receives 'é' in UTF-8 and leaves it alone, but + // returns it encoded back to ISO-8859-1 → 0xE9. + self::assertSame("\xE9", $output); + } + + public function testInvalidUtf8InAttributeContextThrows(): void + { + $this->expectException(InvalidUtf8Exception::class); + + // 0xC3 0x28 is a broken 2-byte sequence. + (new Escaper())->escHtmlAttr("\xC3\x28"); + } + + public function testWindows1252RoundTripThroughAttributeContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é" in windows-1252. It is outside the attribute whitelist + // so the matcher must produce a numeric entity. The output reaches + // the caller after a UTF-8 → windows-1252 conversion back. + self::assertSame('é', $escaper->escHtmlAttr("\xE9")); + } + + public function testWindows1252RoundTripThroughJsContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é". In the JS context the matcher emits é. + self::assertSame('\\u00E9', $escaper->escJs("\xE9")); + } + + public function testWindows1252RoundTripThroughCssContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é". In the CSS context the matcher emits "\E9 ". + self::assertSame('\\E9 ', $escaper->escCss("\xE9")); + } + + public function testForwardConversionFailureRaisesException(): void + { + $this->expectException(EncodingConversionException::class); + $this->expectExceptionMessage('Failed to convert string from "windows-1252" to "UTF-8".'); + + // 0x81 is an undefined byte in windows-1252 — iconv returns false. + (new Escaper('windows-1252'))->escHtmlAttr("\x81"); + } +} diff --git a/tests/EscaperHtmlAttrTest.php b/tests/EscaperHtmlAttrTest.php new file mode 100644 index 0000000..721af03 --- /dev/null +++ b/tests/EscaperHtmlAttrTest.php @@ -0,0 +1,99 @@ +escaper = new Escaper(); + } + + public function testEmptyStringShortCircuits(): void + { + self::assertSame('', $this->escaper->escHtmlAttr('')); + } + + public function testDigitsOnlyShortCircuits(): void + { + self::assertSame('12345', $this->escaper->escHtmlAttr('12345')); + } + + public function testWhitelistCharactersPassThrough(): void + { + self::assertSame( + 'abc,XYZ.-_0123', + $this->escaper->escHtmlAttr('abc,XYZ.-_0123') + ); + } + + public function testQuotelessAttributeInjectionVector(): void + { + $input = 'faketitle onmouseover=alert(/InitPHP!/);'; + + self::assertSame( + 'faketitle onmouseover=alert(/InitPHP!/);', + $this->escaper->escHtmlAttr($input) + ); + } + + public function testNamedEntitiesPreferredOverNumericForms(): void + { + self::assertSame('"', $this->escaper->escHtmlAttr('"')); + self::assertSame('&', $this->escaper->escHtmlAttr('&')); + self::assertSame('<', $this->escaper->escHtmlAttr('<')); + self::assertSame('>', $this->escaper->escHtmlAttr('>')); + } + + public function testControlCharactersBecomeReplacementCharacter(): void + { + // 0x00, 0x01, 0x1B all fall into the C0 range and must not survive. + self::assertSame('�', $this->escaper->escHtmlAttr("\x00")); + self::assertSame('�', $this->escaper->escHtmlAttr("\x01")); + self::assertSame('�', $this->escaper->escHtmlAttr("\x1B")); + } + + public function testTabLineFeedAndCarriageReturnAreEscapedNotReplaced(): void + { + // Tab/LF/CR are explicitly exempted from the replacement rule. + self::assertSame(' ', $this->escaper->escHtmlAttr("\t")); + self::assertSame(' ', $this->escaper->escHtmlAttr("\n")); + self::assertSame(' ', $this->escaper->escHtmlAttr("\r")); + } + + public function testC1ControlsBecomeReplacementCharacter(): void + { + // U+007F DEL (single-byte UTF-8). + self::assertSame('�', $this->escaper->escHtmlAttr("\x7F")); + // U+0080 PADDING CHARACTER (multibyte UTF-8: 0xC2 0x80). + self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x80")); + // U+009F APPLICATION PROGRAM COMMAND (multibyte UTF-8: 0xC2 0x9F). + self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x9F")); + } + + public function testU00A0IsEscapedNotReplaced(): void + { + // U+00A0 NO-BREAK SPACE sits just outside the C1 range and must be + // escaped as a normal character, not replaced. + self::assertSame(' ', $this->escaper->escHtmlAttr("\xC2\xA0")); + } + + public function testBmpMultibyteCharacterUsesFourDigitHex(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('ş', $this->escaper->escHtmlAttr('ş')); + } + + public function testSupplementaryPlaneCharacterEmitsFullHex(): void + { + // U+1F680 ROCKET — beyond the BMP. + self::assertSame('🚀', $this->escaper->escHtmlAttr('🚀')); + } +} diff --git a/tests/EscaperHtmlTest.php b/tests/EscaperHtmlTest.php new file mode 100644 index 0000000..966260d --- /dev/null +++ b/tests/EscaperHtmlTest.php @@ -0,0 +1,65 @@ +escaper = new Escaper(); + } + + public function testEscapesAngleBracketsAndQuotes(): void + { + $input = ''; + $output = $this->escaper->escHtml($input); + + self::assertSame( + '<script>alert("xss")</script>', + $output + ); + } + + public function testEscapesSingleQuoteWithEntQuotes(): void + { + self::assertSame(''', $this->escaper->escHtml("'")); + } + + public function testEscapesAmpersand(): void + { + self::assertSame('Tom & Jerry', $this->escaper->escHtml('Tom & Jerry')); + } + + public function testEmptyStringReturnsEmptyString(): void + { + self::assertSame('', $this->escaper->escHtml('')); + } + + public function testPlainAsciiPassesThroughUnchanged(): void + { + self::assertSame('Hello, world!', $this->escaper->escHtml('Hello, world!')); + } + + public function testMultibyteCharactersPassThroughInUtf8(): void + { + // htmlspecialchars only touches &, <, >, ", ' — multibyte stays. + self::assertSame('Merhaba dünya — şŞıİğĞ', $this->escaper->escHtml('Merhaba dünya — şŞıİğĞ')); + } + + public function testInvalidByteSequenceIsReplacedNotDropped(): void + { + // ENT_SUBSTITUTE replaces malformed UTF-8 with U+FFFD instead of + // returning an empty string (the unsafe ENT_IGNORE behaviour). + $invalid = "\xC3\x28"; // invalid 2-byte sequence + $output = $this->escaper->escHtml($invalid); + + self::assertNotSame('', $output); + } +} diff --git a/tests/EscaperJsTest.php b/tests/EscaperJsTest.php new file mode 100644 index 0000000..42a9c22 --- /dev/null +++ b/tests/EscaperJsTest.php @@ -0,0 +1,63 @@ +escaper = new Escaper(); + } + + public function testEmptyStringShortCircuits(): void + { + self::assertSame('', $this->escaper->escJs('')); + } + + public function testDigitsOnlyShortCircuits(): void + { + self::assertSame('98765', $this->escaper->escJs('98765')); + } + + public function testWhitelistCharactersPassThrough(): void + { + self::assertSame('abc,XYZ._0', $this->escaper->escJs('abc,XYZ._0')); + } + + public function testEntityBasedInjectionVectorIsEscaped(): void + { + $input = 'bar"; alert("Hello!"); var xss="true'; + + self::assertSame( + 'bar\\x26quot\\x3B\\x3B\\x20alert\\x28\\x26quot\\x3BHello\\x21\\x26quot\\x3B\\x29\\x3B\\x20var\\x20xss\\x3D\\x26quot\\x3Btrue', + $this->escaper->escJs($input) + ); + } + + public function testSingleByteSpecialCharsBecomeHexEscapes(): void + { + self::assertSame('\\x20', $this->escaper->escJs(' ')); + self::assertSame('\\x22', $this->escaper->escJs('"')); + self::assertSame('\\x2F', $this->escaper->escJs('/')); + self::assertSame('\\x3C', $this->escaper->escJs('<')); + } + + public function testBmpMultibyteCharacterBecomesUnicodeEscape(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('\\u015F', $this->escaper->escJs('ş')); + } + + public function testSupplementaryPlaneCharacterBecomesSurrogatePair(): void + { + // U+1F680 → high surrogate D83D + low surrogate DE80 + self::assertSame('\\uD83D\\uDE80', $this->escaper->escJs('🚀')); + } +} diff --git a/tests/EscaperUrlTest.php b/tests/EscaperUrlTest.php new file mode 100644 index 0000000..043fd51 --- /dev/null +++ b/tests/EscaperUrlTest.php @@ -0,0 +1,45 @@ +escaper = new Escaper(); + } + + public function testEmptyStringReturnsEmptyString(): void + { + self::assertSame('', $this->escaper->escUrl('')); + } + + public function testRfc3986UnreservedCharactersAreNotEncoded(): void + { + self::assertSame('Hello.world-1_2~3', $this->escaper->escUrl('Hello.world-1_2~3')); + } + + public function testSpaceIsPercentEncodedAsPercent20(): void + { + // rawurlencode (RFC 3986) — not "+" like urlencode. + self::assertSame('foo%20bar', $this->escaper->escUrl('foo bar')); + } + + public function testJavascriptInjectionVectorIsPercentEncoded(): void + { + $input = '" onmouseover="alert(\'hello\')'; + $output = $this->escaper->escUrl($input); + + self::assertSame( + '%22%20onmouseover%3D%22alert%28%27hello%27%29', + $output + ); + } +} diff --git a/tests/ExceptionHierarchyTest.php b/tests/ExceptionHierarchyTest.php new file mode 100644 index 0000000..dac7936 --- /dev/null +++ b/tests/ExceptionHierarchyTest.php @@ -0,0 +1,60 @@ +getParentClass(); + self::assertNotFalse($parent, \sprintf('Class "%s" must have a parent.', $class)); + + return $parent->getName(); + } +}