diff --git a/.gitattributes b/.gitattributes
index 6e5411e..7f5d248 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,10 @@
-.gitignore export-ignore
-.gitattributes export-ignore
-/Examples export-ignore
\ No newline at end of file
+# Exclude from Composer dist tarballs.
+/.github export-ignore
+/.gitattributes export-ignore
+/.gitignore export-ignore
+/.php-cs-fixer.dist.php export-ignore
+/.phpunit.cache export-ignore
+/docs export-ignore
+/phpstan.neon.dist export-ignore
+/phpunit.xml.dist export-ignore
+/tests export-ignore
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..fefbebf
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,139 @@
+name: CI
+
+on:
+ push:
+ branches: [main, "*.x"]
+ pull_request:
+ branches: [main, "*.x"]
+
+jobs:
+ validate:
+ name: composer validate
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: shivammathur/setup-php@v2
+ with:
+ php-version: "8.2"
+ coverage: none
+ tools: composer:v2
+ - run: composer validate --strict
+
+ cs:
+ name: PHP-CS-Fixer
+ runs-on: ubuntu-latest
+ needs: validate
+ steps:
+ - uses: actions/checkout@v4
+ - uses: shivammathur/setup-php@v2
+ with:
+ php-version: "8.2"
+ extensions: ctype, mbstring, iconv
+ coverage: none
+ tools: composer:v2
+ - name: Install dependencies
+ run: composer install --prefer-dist --no-progress --no-interaction
+ - name: Check coding standards
+ run: composer cs-check
+
+ stan:
+ name: PHPStan
+ runs-on: ubuntu-latest
+ needs: validate
+ steps:
+ - uses: actions/checkout@v4
+ - uses: shivammathur/setup-php@v2
+ with:
+ php-version: "8.2"
+ extensions: ctype, mbstring, iconv
+ coverage: none
+ tools: composer:v2
+ - name: Install dependencies
+ run: composer install --prefer-dist --no-progress --no-interaction
+ - name: Run PHPStan
+ run: composer stan
+
+ tests:
+ name: PHPUnit (PHP ${{ matrix.php }}, ${{ matrix.deps }})
+ runs-on: ubuntu-latest
+ needs: validate
+ strategy:
+ fail-fast: false
+ matrix:
+ php: ["7.4", "8.0", "8.1", "8.2", "8.3", "8.4"]
+ deps: ["highest"]
+ include:
+ - php: "7.4"
+ deps: "lowest"
+ - php: "8.4"
+ deps: "lowest"
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up PHP ${{ matrix.php }}
+ uses: shivammathur/setup-php@v2
+ with:
+ php-version: ${{ matrix.php }}
+ extensions: ctype, mbstring, iconv
+ coverage: none
+ tools: composer:v2
+
+ - name: Validate composer.json
+ run: composer validate --no-check-publish
+
+ - name: Get composer cache directory
+ id: composer-cache
+ run: echo "dir=$(composer config cache-files-dir)" >> "$GITHUB_OUTPUT"
+
+ - name: Cache composer dependencies
+ uses: actions/cache@v4
+ with:
+ path: ${{ steps.composer-cache.outputs.dir }}
+ key: composer-${{ matrix.php }}-${{ matrix.deps }}-${{ hashFiles('**/composer.json') }}
+ restore-keys: composer-${{ matrix.php }}-${{ matrix.deps }}-
+
+ - name: Install highest dependencies
+ if: matrix.deps == 'highest'
+ run: composer update --prefer-dist --no-progress --no-interaction
+
+ - name: Install lowest dependencies
+ if: matrix.deps == 'lowest'
+ run: composer update --prefer-dist --no-progress --no-interaction --prefer-lowest --prefer-stable
+
+ - name: Run PHPUnit
+ run: vendor/bin/phpunit
+
+ coverage:
+ name: Coverage
+ runs-on: ubuntu-latest
+ needs: tests
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up PHP
+ uses: shivammathur/setup-php@v2
+ with:
+ php-version: "8.2"
+ extensions: ctype, mbstring, iconv
+ coverage: pcov
+ tools: composer:v2
+
+ - name: Install dependencies
+ run: composer install --prefer-dist --no-progress --no-interaction
+
+ - name: Run PHPUnit with coverage
+ run: vendor/bin/phpunit --coverage-clover=coverage.xml
+
+ - name: Upload coverage artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-clover
+ path: coverage.xml
+ retention-days: 14
+
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v5
+ with:
+ files: ./coverage.xml
+ flags: phpunit
+ fail_ci_if_error: false
diff --git a/.gitignore b/.gitignore
index 0abe7ad..8bb648c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,8 @@
/.vscode/
/.vs/
/vendor/
-/composer.lock
\ No newline at end of file
+/composer.lock
+/build/
+/.phpunit.cache/
+/.phpunit.result.cache
+/.php-cs-fixer.cache
diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php
new file mode 100644
index 0000000..7deb8da
--- /dev/null
+++ b/.php-cs-fixer.dist.php
@@ -0,0 +1,32 @@
+in([__DIR__ . '/src', __DIR__ . '/tests'])
+ ->name('*.php');
+
+return (new PhpCsFixer\Config())
+ ->setRiskyAllowed(true)
+ ->setRules([
+ '@PSR12' => true,
+ '@PSR12:risky' => true,
+ '@PHP74Migration' => true,
+ '@PHP74Migration:risky' => true,
+ 'array_syntax' => ['syntax' => 'short'],
+ 'declare_strict_types' => true,
+ 'native_function_invocation' => [
+ 'include' => ['@compiler_optimized'],
+ 'scope' => 'namespaced',
+ 'strict' => true,
+ ],
+ 'no_unused_imports' => true,
+ 'ordered_imports' => [
+ 'imports_order' => ['class', 'function', 'const'],
+ 'sort_algorithm' => 'alpha',
+ ],
+ 'single_quote' => true,
+ 'trailing_comma_in_multiline' => ['elements' => ['arrays']],
+ ])
+ ->setFinder($finder)
+ ->setCacheFile(__DIR__ . '/build/php-cs-fixer.cache');
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3ad7245
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,85 @@
+# Changelog
+
+All notable changes to `initphp/escaper` are documented here.
+
+The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [2.0.0]
+
+A reliability- and correctness-focused major release. Several bug fixes
+in this release are visible from the outside, so the version bump is
+necessary even though the public surface is unchanged. See
+[`UPGRADE-2.0.md`](./UPGRADE-2.0.md) for a step-by-step migration guide.
+
+### Added
+
+- Dedicated exception hierarchy under `InitPHP\Escaper\Exception\`:
+ `EscaperException` (base, extends `\RuntimeException`),
+ `EncodingNotSupportedException`, `EncodingConversionException`,
+ `InvalidContextException`, `InvalidUtf8Exception`.
+- Full PHPUnit test suite under `tests/` (62 tests, 100 assertions,
+ ~91% line coverage).
+- GitHub Actions CI: `composer validate`, PHP-CS-Fixer, PHPStan (max),
+ PHPUnit on PHP 7.4 – 8.4, coverage upload.
+- PHPStan configuration at the `max` level with zero reported issues.
+- PHP-CS-Fixer configuration based on `@PSR12` and
+ `@PHP74Migration` rule sets.
+- Developer documentation under `docs/` covering each escape context,
+ encoding handling, exceptions and security notes.
+- `Esc::reset()` helper to clear the memoised `Escaper` cache (used by
+ tests; useful when the calling code wants to drop cached instances).
+- `composer.json` scripts: `test`, `test-coverage`, `stan`, `cs-check`,
+ `cs-fix`, `ci`.
+
+### Changed
+
+- **`Esc::esc()` recursion** now propagates `$encoding` into recursive
+ calls. Previously the encoding was dropped on every inner call,
+ silently defaulting to UTF-8 for nested arrays.
+- **`Esc::esc()` instance cache** is now keyed by encoding. The previous
+ cache compared `$escaper->getEncoding()` (`'utf-8'`) against the raw
+ `$encoding` argument (often `null`), so the cache rebuilt on every
+ default call.
+- **`Escaper` constructor** raises `EncodingNotSupportedException`
+ instead of `\Exception`. (Still catchable via `\Exception` /
+ `\RuntimeException`.)
+- **`HTML attribute` matcher** evaluates the C0/C1 control-character
+ check against the decoded code point, so `U+0080`–`U+009F` are now
+ correctly replaced with `U+FFFD` when they arrive in multibyte UTF-8
+ form. Previously only single-byte controls were caught.
+- **`composer.json`** now requires `ext-mbstring`. `ext-iconv` remains
+ optional and is preferred when present (`suggest` entry added).
+- **PHPDoc blocks** rewritten across the package to reflect the actual
+ code behaviour.
+
+### Fixed
+
+- **Silent data loss on encoding-conversion failure.** When `iconv` /
+ `mb_convert_encoding` returned `false`, `Escaper::convertEncoding()`
+ previously returned an empty string and let it propagate, masking
+ real failures. It now raises `EncodingConversionException`.
+- **`isUtf8()`** uses explicit `=== 1` comparison against
+ `preg_match()` instead of relying on PHP's loose type coercion in a
+ `bool` return.
+- **Misleading error message** in `convertEncoding()`: the "MB_String
+ plugin is required" text appeared even when iconv was tried first.
+ Replaced with "Either ext-iconv or ext-mbstring is required".
+- **Unused callable properties** (`$htmlAttrMatcher`, `$jsMatcher`,
+ `$cssMatcher`) removed. The matchers are now passed inline to
+ `preg_replace_callback`.
+
+### Removed
+
+- **`Examples/`** directory removed. The same scenarios are documented
+ under [`docs/`](./docs) with verified output for each example.
+
+## [1.0]
+
+Initial release.
+
+[Unreleased]: https://github.com/InitPHP/Escaper/compare/2.0.0...HEAD
+[2.0.0]: https://github.com/InitPHP/Escaper/compare/1.0...2.0.0
+[1.0]: https://github.com/InitPHP/Escaper/releases/tag/1.0
diff --git a/Examples/Attr.php b/Examples/Attr.php
deleted file mode 100644
index 1d5e62d..0000000
--- a/Examples/Attr.php
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
- Quoteless Attribute
-
-
-
-
-
- ?>
- >
- Hello World
-
-
-
-
\ No newline at end of file
diff --git a/Examples/Css.php b/Examples/Css.php
deleted file mode 100644
index bf5443e..0000000
--- a/Examples/Css.php
+++ /dev/null
@@ -1,28 +0,0 @@
-');
-}
-INPUT;
-?>
-
-
-
- Escaped CSS
-
-
-
-
-User controlled CSS needs to be properly escaped!
-
-
\ No newline at end of file
diff --git a/Examples/Html.php b/Examples/Html.php
deleted file mode 100644
index be8bad7..0000000
--- a/Examples/Html.php
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
- Encodings set correctly!
-
-
-
-alert("initphp")';
-
-// <script>alert("initphp")</script>
-echo Esc::esc($input, 'html');
-
-?>
-
\ No newline at end of file
diff --git a/Examples/Js.php b/Examples/Js.php
deleted file mode 100644
index 6156d03..0000000
--- a/Examples/Js.php
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-
-
- Escaped Entities
-
-
-
-
-Hello World
-
-
\ No newline at end of file
diff --git a/Examples/Url.php b/Examples/Url.php
deleted file mode 100644
index e8654a2..0000000
--- a/Examples/Url.php
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
- Unescaped URL data
-
-
-
-
-Click here!
-
-
\ No newline at end of file
diff --git a/README.md b/README.md
index 5c8e332..6133c91 100644
--- a/README.md
+++ b/README.md
@@ -1,180 +1,156 @@
-# InitPHP Escaper
+# initphp/escaper
-Securely and safely escape HTML, HTML attributes, JavaScript, CSS, and URLs.
+Context-aware output escaper for PHP. Safely render untrusted user input inside
+HTML, HTML attributes, JavaScript, CSS and URLs.
-[](https://packagist.org/packages/initphp/escaper) [](https://packagist.org/packages/initphp/escaper) [](https://packagist.org/packages/initphp/escaper) [](https://packagist.org/packages/initphp/escaper) [](https://packagist.org/packages/initphp/escaper)
+[](https://packagist.org/packages/initphp/escaper)
+[](https://packagist.org/packages/initphp/escaper)
+[](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml)
+[](https://codecov.io/gh/InitPHP/Escaper)
+[](https://packagist.org/packages/initphp/escaper)
+[](https://packagist.org/packages/initphp/escaper)
-## Requirements
+`htmlspecialchars()` is not enough on its own. Each output context — an HTML
+body, an attribute, a JavaScript string literal, a CSS value, a URL parameter
+— needs its own escaping rules, and using the wrong one can leave you exposed
+to XSS even when you *think* you have escaped your data.
-- PHP 7.4 or higher
-- PHP _CType_ Extension
-- PHP _MB_String_ or _Iconv_ Extension
+`initphp/escaper` implements the rules from the
+[OWASP XSS Prevention Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html)
+for the five most common contexts, behind a small, dependency-free API.
## Installation
-```php
+```bash
composer require initphp/escaper
```
-## Usage
+### Requirements
-`\InitPHP\Escaper\Esc::esc()` :
+- PHP 7.4 or newer
+- `ext-ctype`
+- `ext-mbstring` (required); `ext-iconv` is used when present and preferred
+ over mbstring
-```php
-public static function esc(string[]|string $data, string $context = 'html', ?string $encoding = null): array|string;
-```
+## Quick start
+
+```php
+use InitPHP\Escaper\Esc;
+
+echo Esc::esc('');
+// <script>alert(1)</script>
+
+echo Esc::esc('faketitle onmouseover=alert(1);', 'attr');
+// faketitle onmouseover=alert(1);
+
+echo Esc::esc('"; alert(1); var x="', 'js');
+// \x22\x3B\x20alert\x281\x29\x3B\x20var\x20x\x3D\x22
-- `$data` : The content to be cleared.
-- `$context` : The method to be used for cleaning. If the value is not one of the following; Throws `Exception`.
- - `html`
- - `js`
- - `css`
- - `url`
- - `attr`
-- `$encoding` : If the character set to be used is not specified or `NULL`; `UTF-8` is used by default.
-
-`html` Escaper Example :
-```php
-alert("initphp")';
-?>
-
-
-
- Encodings set correctly!
-
-
-
-
-
-
+echo Esc::esc('', 'css');
+// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E
+
+echo Esc::esc('" onmouseover="alert(1)', 'url');
+// %22%20onmouseover%3D%22alert%281%29
```
-`attr` Escaper Example :
+`Esc::esc()` also accepts arrays and recurses into them, so escaping a whole
+request payload at the view boundary is a one-liner:
```php
-
-
-
-
- Quoteless Attribute
-
-
-
-
-
- ?>
- >
- Hello World
-
-
-
-
+$safe = Esc::esc($_GET, 'html');
```
-`Js` Escaper Example :
+## API
-```php
-
-
-
-
- Escaped Entities
-
-
-
-
-Hello World
-
-
+```php
+public static function esc(
+ array|string $data,
+ string $context = 'html',
+ ?string $encoding = null
+): array|string;
```
-`css` Escaper Example :
+| Argument | Description |
+| ----------- | --------------------------------------------------------------------------- |
+| `$data` | A string, or an array (which is escaped recursively). |
+| `$context` | `html`, `attr`, `js`, `css`, `url`, or `raw` (returns input unchanged). |
+| `$encoding` | Output encoding. `null` resolves to UTF-8. See [Encodings](docs/encodings.md). |
+
+Throws `InitPHP\Escaper\Exception\InvalidContextException` for unknown contexts.
+
+### `Escaper`
+
+For lower-level use, instantiate `Escaper` directly. Each instance is bound to
+one encoding and exposes one method per context:
```php
-');
-}
-INPUT;
-?>
-
-
-
- Escaped CSS
-
-
-
-
-User controlled CSS needs to be properly escaped!
-
-
+use InitPHP\Escaper\Escaper;
+
+$escaper = new Escaper(); // utf-8
+$escaper = new Escaper('windows-1252');
+
+$escaper->escHtml($string);
+$escaper->escHtmlAttr($string);
+$escaper->escJs($string);
+$escaper->escCss($string);
+$escaper->escUrl($string);
```
-`url` Escaper Example :
+## Documentation
-```php
-
-
-
-
- Unescaped URL
-
-
-
-
-Click
-
-
+The [`docs/`](docs/) directory contains a per-context walkthrough with
+examples, do-and-don't guidance and security notes:
+
+- [Getting started](docs/getting-started.md)
+- [HTML body context](docs/context-html.md)
+- [HTML attribute context](docs/context-html-attribute.md)
+- [JavaScript context](docs/context-javascript.md)
+- [CSS context](docs/context-css.md)
+- [URL context](docs/context-url.md)
+- [Encodings](docs/encodings.md)
+- [Exceptions](docs/exceptions.md)
+- [Security notes](docs/security-notes.md)
+
+## A word of warning
+
+> Output escaping prevents XSS but it is not a substitute for input validation,
+> authentication, or authorisation. It is also context-sensitive: the
+> JavaScript escaper assumes the caller wraps the result in quotes, the HTML
+> attribute escaper assumes the value is used as a single attribute value, and
+> so on. Read the per-context docs before mixing contexts.
+
+## Contributing
+
+Contributions are welcome. Please read the
+[org-wide CONTRIBUTING guide](https://github.com/InitPHP/.github/blob/main/CONTRIBUTING.md)
+for the workflow, coding standards and test expectations.
+
+A typical loop is:
+
+```bash
+git clone https://github.com/InitPHP/Escaper.git
+cd Escaper
+composer install
+composer ci # cs-check + phpstan + phpunit
```
-## Credits
+Individual steps are also available:
+
+| Command | What it does |
+| ------------------ | ------------------------------------------- |
+| `composer test` | Run PHPUnit |
+| `composer stan` | Run PHPStan (max level) |
+| `composer cs-check`| Report PHP-CS-Fixer violations, no changes |
+| `composer cs-fix` | Apply PHP-CS-Fixer changes |
+
+## Security
-- [Muhammet ŞAFAK](https://www.muhammetsafak.com.tr) <>
+If you discover a security issue, please follow the disclosure process
+documented in [SECURITY.md](https://github.com/InitPHP/.github/blob/main/SECURITY.md)
+rather than opening a public issue.
## License
-Copyright © 2022 [MIT License](./LICENSE)
+Released under the [MIT License](./LICENSE). © InitPHP.
diff --git a/UPGRADE-2.0.md b/UPGRADE-2.0.md
new file mode 100644
index 0000000..891a1ed
--- /dev/null
+++ b/UPGRADE-2.0.md
@@ -0,0 +1,140 @@
+# Upgrading from 1.x to 2.0
+
+`initphp/escaper` 2.0 is a correctness release. The public API surface
+is unchanged — every 1.x method still exists with the same signature.
+What changed is **how the escaper signals failure** and **what happens
+in a few edge cases that were latent bugs in 1.x**.
+
+If your 1.x code only calls `Esc::esc()` or `Escaper::escHtml()` etc.
+in the happy path, you should be able to upgrade without code changes.
+The notes below cover the cases where you may need to act.
+
+## 1. New `composer require`: `ext-mbstring`
+
+`composer.json` now declares `ext-mbstring` as a hard requirement
+(`ext-iconv` remains optional but is preferred when present). If your
+production image does not bundle mbstring you must add it:
+
+```Dockerfile
+RUN docker-php-ext-install mbstring
+```
+
+Or on a Debian/Ubuntu host:
+
+```bash
+apt-get install -y php-mbstring
+```
+
+## 2. Replace `catch (\Exception $e)` blocks (recommended)
+
+1.x threw a plain `\Exception`. 2.x ships a dedicated exception tree.
+Your existing `\Exception` (or `\Throwable`) catches still work because
+the new exceptions extend `\RuntimeException`, but you can now be
+specific:
+
+```diff
+ use InitPHP\Escaper\Esc;
++use InitPHP\Escaper\Exception\EscaperException;
+
+ try {
+ echo Esc::esc($value, 'attr');
+-} catch (\Exception $e) {
++} catch (EscaperException $e) {
+ // …
+ }
+```
+
+The full tree:
+
+```
+\RuntimeException
+ └─ InitPHP\Escaper\Exception\EscaperException
+ ├─ EncodingNotSupportedException // unsupported encoding constructor arg
+ ├─ EncodingConversionException // iconv/mbstring failure (NEW behaviour, see §3)
+ ├─ InvalidContextException // unknown context passed to Esc::esc()
+ └─ InvalidUtf8Exception // input is not / cannot be UTF-8
+```
+
+## 3. Encoding-conversion failure now throws (behavioural break)
+
+In 1.x, if `iconv` / `mb_convert_encoding` returned `false`, the
+escaper silently substituted an empty string and returned it. That
+silently destroyed data. 2.x raises `EncodingConversionException`
+instead.
+
+If you rely on the old "empty string on failure" behaviour, add an
+explicit `try`/`catch`:
+
+```php
+try {
+ $safe = $escaper->escHtmlAttr($value);
+} catch (EncodingConversionException $e) {
+ $safe = '';
+}
+```
+
+Most callers will want the exception. If you were silently corrupting
+output before, you will now see the error.
+
+## 4. `Esc::esc()` on arrays now keeps the `$encoding` argument
+
+This is a bug fix. In 1.x, `Esc::esc(['x' => $v], 'html',
+'iso-8859-1')` recursed into the array and called itself **without
+the encoding**, so every nested value escaped as UTF-8 regardless of
+the third argument. 2.x propagates the encoding correctly.
+
+If you were depending on the bug (i.e. you passed an encoding but
+expected UTF-8 for nested values), drop the encoding argument:
+
+```diff
+-Esc::esc($payload, 'html', 'iso-8859-1');
++Esc::esc($payload, 'html');
+```
+
+## 5. C1 control characters in multibyte UTF-8
+
+`escHtmlAttr` always replaced single-byte C0/C1 controls with the
+Unicode replacement character (`U+FFFD`). In 1.x the replacement only
+fired against the **first byte** of a multibyte sequence, so
+`U+0080`–`U+009F` in their proper 2-byte UTF-8 form (`\xC2\x80` …
+`\xC2\x9F`) survived as numeric character references (`` …
+``) instead of being replaced.
+
+2.x catches both forms. The output for those exact code points
+changed from `` etc. to `�`. Both are XSS-safe; if you
+were diffing output byte-for-byte across versions, expect this drift.
+
+## 6. `Esc::esc()` cache is now actually effective
+
+Not a BC break, but worth knowing: in 1.x the static cache rebuilt the
+`Escaper` on every call when `$encoding === null`. 2.x caches per
+encoding. No code change needed — your default-encoding calls just got
+faster.
+
+## 7. Examples directory removed
+
+The runnable PHP files under `Examples/` are gone. The same scenarios
+live under [`docs/`](./docs) with each output verified by running the
+escaper itself. If you scripted against the example file paths, point
+your tooling at `docs/` instead.
+
+## 8. Static analysis & coding-standard tooling (dev only)
+
+If you have a fork or downstream patches, note that 2.x adds:
+
+- `phpstan.neon.dist` (level `max`, zero errors)
+- `.php-cs-fixer.dist.php` (`@PSR12 + @PHP74Migration`)
+
+Your local changes should pass `composer ci` before being submitted as
+PRs.
+
+## Summary checklist
+
+- [ ] `ext-mbstring` available in every environment.
+- [ ] `catch (\Exception)` → `catch (EscaperException)` (optional).
+- [ ] Handle `EncodingConversionException` if you used to rely on the
+ silent empty-string fallback.
+- [ ] Drop redundant `$encoding` arguments that depended on the
+ recursion bug.
+- [ ] Re-run any byte-for-byte output snapshots that include the
+ `U+0080`–`U+009F` range.
diff --git a/composer.json b/composer.json
index ad53f95..663399d 100644
--- a/composer.json
+++ b/composer.json
@@ -1,13 +1,20 @@
{
"name": "initphp/escaper",
- "description": "InitPHP Escaper Class",
+ "description": "Context-aware output escaper (HTML, attribute, JavaScript, CSS, URL) for safely rendering untrusted user input.",
"type": "library",
"license": "MIT",
- "autoload": {
- "psr-4": {
- "InitPHP\\Escaper\\": "src/"
- }
- },
+ "keywords": [
+ "escaper",
+ "escape",
+ "xss",
+ "security",
+ "html",
+ "javascript",
+ "css",
+ "url",
+ "output-encoding",
+ "owasp"
+ ],
"authors": [
{
"name": "Muhammet ŞAFAK",
@@ -16,9 +23,59 @@
"homepage": "https://www.muhammetsafak.com.tr"
}
],
- "minimum-stability": "stable",
+ "support": {
+ "issues": "https://github.com/InitPHP/Escaper/issues",
+ "source": "https://github.com/InitPHP/Escaper",
+ "docs": "https://github.com/InitPHP/Escaper/tree/main/docs"
+ },
"require": {
"php": ">=7.4",
- "ext-ctype": "*"
+ "ext-ctype": "*",
+ "ext-mbstring": "*"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^9.6 || ^10.5 || ^11.5",
+ "phpstan/phpstan": "^1.12 || ^2.1",
+ "friendsofphp/php-cs-fixer": "^3.65"
+ },
+ "suggest": {
+ "ext-iconv": "Preferred for encoding conversion; iconv is tried before mbstring."
+ },
+ "autoload": {
+ "psr-4": {
+ "InitPHP\\Escaper\\": "src/"
+ }
+ },
+ "autoload-dev": {
+ "psr-4": {
+ "InitPHP\\Escaper\\Tests\\": "tests/"
+ }
+ },
+ "scripts": {
+ "test": "phpunit",
+ "test-coverage": "phpunit --coverage-html build/coverage",
+ "stan": "phpstan analyse --no-progress",
+ "cs-check": "php-cs-fixer fix --dry-run --diff",
+ "cs-fix": "php-cs-fixer fix",
+ "ci": [
+ "@cs-check",
+ "@stan",
+ "@test"
+ ]
+ },
+ "scripts-descriptions": {
+ "test": "Run the PHPUnit test suite.",
+ "test-coverage": "Run PHPUnit and produce an HTML coverage report under build/coverage.",
+ "stan": "Run PHPStan at the level configured in phpstan.neon.dist.",
+ "cs-check": "Report any PHP-CS-Fixer violations without modifying files.",
+ "cs-fix": "Apply PHP-CS-Fixer fixes in-place.",
+ "ci": "Run the full CI bundle locally: cs-check, stan, test."
+ },
+ "minimum-stability": "stable",
+ "extra": {
+ "branch-alias": {
+ "dev-2.x": "2.0.x-dev",
+ "dev-main": "2.0.x-dev"
+ }
}
}
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..3167a53
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,33 @@
+# initphp/escaper — Documentation
+
+This directory is the developer reference for `initphp/escaper`. The
+top-level [README](../README.md) is intentionally short; everything in
+depth lives here.
+
+## Index
+
+1. [Getting started](getting-started.md) — install, first call, the `Esc`
+ facade vs. instantiating `Escaper`.
+2. **Per-context guides** — one file per output context, with the rules
+ the escaper applies, the threats it defeats, and runnable examples:
+ - [HTML body context](context-html.md) (`escHtml`)
+ - [HTML attribute context](context-html-attribute.md) (`escHtmlAttr`)
+ - [JavaScript context](context-javascript.md) (`escJs`)
+ - [CSS context](context-css.md) (`escCss`)
+ - [URL context](context-url.md) (`escUrl`)
+3. [Encodings](encodings.md) — non-UTF-8 input/output, the supported list
+ and how conversion is performed.
+4. [Exceptions](exceptions.md) — the exception tree and when each one
+ is thrown.
+5. [Security notes](security-notes.md) — caveats, common misuses, and
+ pointers to authoritative sources.
+
+## Conventions used in these docs
+
+- Code samples assume the autoloader has already been required.
+- Output shown in `// comments` is the literal string the escaper
+ returns. Each sample was generated by running the escaper itself, not
+ hand-written.
+- "Untrusted" means any data that has touched the network, the
+ filesystem, a database, or anything else outside your PHP process —
+ in other words, "almost everything".
diff --git a/docs/context-css.md b/docs/context-css.md
new file mode 100644
index 0000000..ba47c91
--- /dev/null
+++ b/docs/context-css.md
@@ -0,0 +1,59 @@
+# CSS context (`escCss`)
+
+> Use when the value lands inside a CSS property value:
+> `color: HERE;`, `background-image: url(HERE);`, ``.
+
+## What it does
+
+`escCss` whitelists `[A-Za-z0-9]`. Every other character is rewritten as
+the CSS escape sequence `\HEX `, with the **mandatory trailing space**
+that terminates the escape.
+
+The trailing space looks redundant when followed by another character,
+but CSS uses it as a delimiter — without it, the parser would eat
+hex-digit-looking characters that follow the escape. Always emit it.
+
+## Example — preventing a `` breakout
+
+```php
+use InitPHP\Escaper\Esc;
+
+$untrusted = '';
+
+echo Esc::esc($untrusted, 'css');
+// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E
+```
+
+The `<`, `>`, `/`, `(`, `)`, and spaces all turn into CSS escapes, so
+the attacker cannot close the `';
+ $output = $this->escaper->escCss($input);
+
+ self::assertSame(
+ '\\3C \\2F style\\3E \\3C script\\3E alert\\28 1\\29 \\3C \\2F script\\3E ',
+ $output
+ );
+ }
+
+ public function testBmpMultibyteCharacterBecomesHexEscape(): void
+ {
+ // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş)
+ self::assertSame('\\15F ', $this->escaper->escCss('ş'));
+ }
+
+ public function testSupplementaryPlaneCharacterBecomesHexEscape(): void
+ {
+ // U+1F680 → 1F680 in hex.
+ self::assertSame('\\1F680 ', $this->escaper->escCss('🚀'));
+ }
+}
diff --git a/tests/EscaperEncodingTest.php b/tests/EscaperEncodingTest.php
new file mode 100644
index 0000000..9aaf639
--- /dev/null
+++ b/tests/EscaperEncodingTest.php
@@ -0,0 +1,112 @@
+getEncoding());
+ }
+
+ public function testNullEncodingResolvesToUtf8(): void
+ {
+ self::assertSame('utf-8', (new Escaper(null))->getEncoding());
+ }
+
+ public function testEmptyStringEncodingResolvesToUtf8(): void
+ {
+ self::assertSame('utf-8', (new Escaper(''))->getEncoding());
+ }
+
+ public function testEncodingLookupIsCaseInsensitive(): void
+ {
+ self::assertSame('utf-8', (new Escaper('UTF-8'))->getEncoding());
+ self::assertSame('windows-1252', (new Escaper('Windows-1252'))->getEncoding());
+ self::assertSame('iso-8859-1', (new Escaper('ISO-8859-1'))->getEncoding());
+ }
+
+ public function testUnsupportedEncodingThrows(): void
+ {
+ $this->expectException(EncodingNotSupportedException::class);
+ $this->expectExceptionMessage('Encoding "utf-16" is not supported.');
+
+ new Escaper('utf-16');
+ }
+
+ public function testEncodingExceptionIsAnEscaperException(): void
+ {
+ try {
+ new Escaper('not-a-real-encoding');
+ self::fail('Expected EncodingNotSupportedException');
+ } catch (EscaperException $e) {
+ self::assertInstanceOf(EncodingNotSupportedException::class, $e);
+ }
+ }
+
+ public function testNonUtf8InputIsConvertedThenEscaped(): void
+ {
+ $escaper = new Escaper('iso-8859-1');
+
+ // ISO-8859-1 byte 0xE9 == "é". When fed in as a single byte the
+ // escaper must first decode it to UTF-8, then re-encode the output
+ // back to ISO-8859-1.
+ $output = $escaper->escHtml("\xE9");
+
+ // htmlspecialchars receives 'é' in UTF-8 and leaves it alone, but
+ // returns it encoded back to ISO-8859-1 → 0xE9.
+ self::assertSame("\xE9", $output);
+ }
+
+ public function testInvalidUtf8InAttributeContextThrows(): void
+ {
+ $this->expectException(InvalidUtf8Exception::class);
+
+ // 0xC3 0x28 is a broken 2-byte sequence.
+ (new Escaper())->escHtmlAttr("\xC3\x28");
+ }
+
+ public function testWindows1252RoundTripThroughAttributeContext(): void
+ {
+ $escaper = new Escaper('windows-1252');
+
+ // 0xE9 == "é" in windows-1252. It is outside the attribute whitelist
+ // so the matcher must produce a numeric entity. The output reaches
+ // the caller after a UTF-8 → windows-1252 conversion back.
+ self::assertSame('é', $escaper->escHtmlAttr("\xE9"));
+ }
+
+ public function testWindows1252RoundTripThroughJsContext(): void
+ {
+ $escaper = new Escaper('windows-1252');
+
+ // 0xE9 == "é". In the JS context the matcher emits é.
+ self::assertSame('\\u00E9', $escaper->escJs("\xE9"));
+ }
+
+ public function testWindows1252RoundTripThroughCssContext(): void
+ {
+ $escaper = new Escaper('windows-1252');
+
+ // 0xE9 == "é". In the CSS context the matcher emits "\E9 ".
+ self::assertSame('\\E9 ', $escaper->escCss("\xE9"));
+ }
+
+ public function testForwardConversionFailureRaisesException(): void
+ {
+ $this->expectException(EncodingConversionException::class);
+ $this->expectExceptionMessage('Failed to convert string from "windows-1252" to "UTF-8".');
+
+ // 0x81 is an undefined byte in windows-1252 — iconv returns false.
+ (new Escaper('windows-1252'))->escHtmlAttr("\x81");
+ }
+}
diff --git a/tests/EscaperHtmlAttrTest.php b/tests/EscaperHtmlAttrTest.php
new file mode 100644
index 0000000..721af03
--- /dev/null
+++ b/tests/EscaperHtmlAttrTest.php
@@ -0,0 +1,99 @@
+escaper = new Escaper();
+ }
+
+ public function testEmptyStringShortCircuits(): void
+ {
+ self::assertSame('', $this->escaper->escHtmlAttr(''));
+ }
+
+ public function testDigitsOnlyShortCircuits(): void
+ {
+ self::assertSame('12345', $this->escaper->escHtmlAttr('12345'));
+ }
+
+ public function testWhitelistCharactersPassThrough(): void
+ {
+ self::assertSame(
+ 'abc,XYZ.-_0123',
+ $this->escaper->escHtmlAttr('abc,XYZ.-_0123')
+ );
+ }
+
+ public function testQuotelessAttributeInjectionVector(): void
+ {
+ $input = 'faketitle onmouseover=alert(/InitPHP!/);';
+
+ self::assertSame(
+ 'faketitle onmouseover=alert(/InitPHP!/);',
+ $this->escaper->escHtmlAttr($input)
+ );
+ }
+
+ public function testNamedEntitiesPreferredOverNumericForms(): void
+ {
+ self::assertSame('"', $this->escaper->escHtmlAttr('"'));
+ self::assertSame('&', $this->escaper->escHtmlAttr('&'));
+ self::assertSame('<', $this->escaper->escHtmlAttr('<'));
+ self::assertSame('>', $this->escaper->escHtmlAttr('>'));
+ }
+
+ public function testControlCharactersBecomeReplacementCharacter(): void
+ {
+ // 0x00, 0x01, 0x1B all fall into the C0 range and must not survive.
+ self::assertSame('�', $this->escaper->escHtmlAttr("\x00"));
+ self::assertSame('�', $this->escaper->escHtmlAttr("\x01"));
+ self::assertSame('�', $this->escaper->escHtmlAttr("\x1B"));
+ }
+
+ public function testTabLineFeedAndCarriageReturnAreEscapedNotReplaced(): void
+ {
+ // Tab/LF/CR are explicitly exempted from the replacement rule.
+ self::assertSame(' ', $this->escaper->escHtmlAttr("\t"));
+ self::assertSame('
', $this->escaper->escHtmlAttr("\n"));
+ self::assertSame('
', $this->escaper->escHtmlAttr("\r"));
+ }
+
+ public function testC1ControlsBecomeReplacementCharacter(): void
+ {
+ // U+007F DEL (single-byte UTF-8).
+ self::assertSame('�', $this->escaper->escHtmlAttr("\x7F"));
+ // U+0080 PADDING CHARACTER (multibyte UTF-8: 0xC2 0x80).
+ self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x80"));
+ // U+009F APPLICATION PROGRAM COMMAND (multibyte UTF-8: 0xC2 0x9F).
+ self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x9F"));
+ }
+
+ public function testU00A0IsEscapedNotReplaced(): void
+ {
+ // U+00A0 NO-BREAK SPACE sits just outside the C1 range and must be
+ // escaped as a normal character, not replaced.
+ self::assertSame(' ', $this->escaper->escHtmlAttr("\xC2\xA0"));
+ }
+
+ public function testBmpMultibyteCharacterUsesFourDigitHex(): void
+ {
+ // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş)
+ self::assertSame('ş', $this->escaper->escHtmlAttr('ş'));
+ }
+
+ public function testSupplementaryPlaneCharacterEmitsFullHex(): void
+ {
+ // U+1F680 ROCKET — beyond the BMP.
+ self::assertSame('🚀', $this->escaper->escHtmlAttr('🚀'));
+ }
+}
diff --git a/tests/EscaperHtmlTest.php b/tests/EscaperHtmlTest.php
new file mode 100644
index 0000000..966260d
--- /dev/null
+++ b/tests/EscaperHtmlTest.php
@@ -0,0 +1,65 @@
+escaper = new Escaper();
+ }
+
+ public function testEscapesAngleBracketsAndQuotes(): void
+ {
+ $input = '';
+ $output = $this->escaper->escHtml($input);
+
+ self::assertSame(
+ '<script>alert("xss")</script>',
+ $output
+ );
+ }
+
+ public function testEscapesSingleQuoteWithEntQuotes(): void
+ {
+ self::assertSame(''', $this->escaper->escHtml("'"));
+ }
+
+ public function testEscapesAmpersand(): void
+ {
+ self::assertSame('Tom & Jerry', $this->escaper->escHtml('Tom & Jerry'));
+ }
+
+ public function testEmptyStringReturnsEmptyString(): void
+ {
+ self::assertSame('', $this->escaper->escHtml(''));
+ }
+
+ public function testPlainAsciiPassesThroughUnchanged(): void
+ {
+ self::assertSame('Hello, world!', $this->escaper->escHtml('Hello, world!'));
+ }
+
+ public function testMultibyteCharactersPassThroughInUtf8(): void
+ {
+ // htmlspecialchars only touches &, <, >, ", ' — multibyte stays.
+ self::assertSame('Merhaba dünya — şŞıİğĞ', $this->escaper->escHtml('Merhaba dünya — şŞıİğĞ'));
+ }
+
+ public function testInvalidByteSequenceIsReplacedNotDropped(): void
+ {
+ // ENT_SUBSTITUTE replaces malformed UTF-8 with U+FFFD instead of
+ // returning an empty string (the unsafe ENT_IGNORE behaviour).
+ $invalid = "\xC3\x28"; // invalid 2-byte sequence
+ $output = $this->escaper->escHtml($invalid);
+
+ self::assertNotSame('', $output);
+ }
+}
diff --git a/tests/EscaperJsTest.php b/tests/EscaperJsTest.php
new file mode 100644
index 0000000..42a9c22
--- /dev/null
+++ b/tests/EscaperJsTest.php
@@ -0,0 +1,63 @@
+escaper = new Escaper();
+ }
+
+ public function testEmptyStringShortCircuits(): void
+ {
+ self::assertSame('', $this->escaper->escJs(''));
+ }
+
+ public function testDigitsOnlyShortCircuits(): void
+ {
+ self::assertSame('98765', $this->escaper->escJs('98765'));
+ }
+
+ public function testWhitelistCharactersPassThrough(): void
+ {
+ self::assertSame('abc,XYZ._0', $this->escaper->escJs('abc,XYZ._0'));
+ }
+
+ public function testEntityBasedInjectionVectorIsEscaped(): void
+ {
+ $input = 'bar"; alert("Hello!"); var xss="true';
+
+ self::assertSame(
+ 'bar\\x26quot\\x3B\\x3B\\x20alert\\x28\\x26quot\\x3BHello\\x21\\x26quot\\x3B\\x29\\x3B\\x20var\\x20xss\\x3D\\x26quot\\x3Btrue',
+ $this->escaper->escJs($input)
+ );
+ }
+
+ public function testSingleByteSpecialCharsBecomeHexEscapes(): void
+ {
+ self::assertSame('\\x20', $this->escaper->escJs(' '));
+ self::assertSame('\\x22', $this->escaper->escJs('"'));
+ self::assertSame('\\x2F', $this->escaper->escJs('/'));
+ self::assertSame('\\x3C', $this->escaper->escJs('<'));
+ }
+
+ public function testBmpMultibyteCharacterBecomesUnicodeEscape(): void
+ {
+ // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş)
+ self::assertSame('\\u015F', $this->escaper->escJs('ş'));
+ }
+
+ public function testSupplementaryPlaneCharacterBecomesSurrogatePair(): void
+ {
+ // U+1F680 → high surrogate D83D + low surrogate DE80
+ self::assertSame('\\uD83D\\uDE80', $this->escaper->escJs('🚀'));
+ }
+}
diff --git a/tests/EscaperUrlTest.php b/tests/EscaperUrlTest.php
new file mode 100644
index 0000000..043fd51
--- /dev/null
+++ b/tests/EscaperUrlTest.php
@@ -0,0 +1,45 @@
+escaper = new Escaper();
+ }
+
+ public function testEmptyStringReturnsEmptyString(): void
+ {
+ self::assertSame('', $this->escaper->escUrl(''));
+ }
+
+ public function testRfc3986UnreservedCharactersAreNotEncoded(): void
+ {
+ self::assertSame('Hello.world-1_2~3', $this->escaper->escUrl('Hello.world-1_2~3'));
+ }
+
+ public function testSpaceIsPercentEncodedAsPercent20(): void
+ {
+ // rawurlencode (RFC 3986) — not "+" like urlencode.
+ self::assertSame('foo%20bar', $this->escaper->escUrl('foo bar'));
+ }
+
+ public function testJavascriptInjectionVectorIsPercentEncoded(): void
+ {
+ $input = '" onmouseover="alert(\'hello\')';
+ $output = $this->escaper->escUrl($input);
+
+ self::assertSame(
+ '%22%20onmouseover%3D%22alert%28%27hello%27%29',
+ $output
+ );
+ }
+}
diff --git a/tests/ExceptionHierarchyTest.php b/tests/ExceptionHierarchyTest.php
new file mode 100644
index 0000000..dac7936
--- /dev/null
+++ b/tests/ExceptionHierarchyTest.php
@@ -0,0 +1,60 @@
+getParentClass();
+ self::assertNotFalse($parent, \sprintf('Class "%s" must have a parent.', $class));
+
+ return $parent->getName();
+ }
+}