Skip to content

Commit

Permalink
Tokenizer/PHP: add tests for heredoc/nowdoc tokenization
Browse files Browse the repository at this point in the history
The PHP tokenizer contains logic to:
* Retokenize the start/end tokens for nowdocs from `T_(START|END)_HEREDOC` to `T_(START|END)_NOWDOC`;
* Retokenize the _contents_ of a heredoc/nowdoc to `T_HEREDOC`/`T_NOWDOC` tokens.
* Retokenize the start token from `T_START_(HERE|NOW)DOC` to `T_STRING` if the heredoc/nowdoc is unclosed;
* Ensure that each line in the contents has its own token.

This commit adds tests safeguarding and documenting this part of the tokenizer.
  • Loading branch information
jrfnl committed Jul 31, 2024
1 parent a3d11a9 commit 3e93f2e
Show file tree
Hide file tree
Showing 4 changed files with 304 additions and 0 deletions.
39 changes: 39 additions & 0 deletions tests/Core/Tokenizer/PHP/HeredocNowdocTest.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

/* testHeredocSingleLine */
echo <<<EOD
Some $var text
EOD;

/* testNowdocSingleLine */
echo <<<'MARKER'
Some text
MARKER;

/* testHeredocMultiLine */
echo <<<"😬"
Lorum ipsum
Some $var text
dolor sit amet
😬;

/* testNowdocMultiLine */
echo <<<'multi_line'
Lorum ipsum
Some text
dolor sit amet
multi_line;

/* testHeredocEndsOnBlankLine */
echo <<<EOD
Lorum ipsum
dolor sit amet
EOD;

/* testNowdocEndsOnBlankLine */
echo <<<'EOD'
Lorum ipsum
dolor sit amet

EOD;
213 changes: 213 additions & 0 deletions tests/Core/Tokenizer/PHP/HeredocNowdocTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
<?php
/**
* Tests the tokenization for heredoc/nowdoc constructs.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
use PHP_CodeSniffer\Util\Tokens;

/**
* Tests the tokenization for heredoc/nowdoc constructs.
*
* Verifies that:
* - Nowdoc opener/closers are retokenized from `T_[START_|END_]HEREDOC` to `T_[START_|END_]NOWDOC`.
* - The contents of the heredoc/nowdoc is tokenized as `T_HEREDOC`/`T_NOWDOC`.
* - Each line of the contents has its own token, which includes the new line char.
*
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
*/
final class HeredocNowdocTest extends AbstractTokenizerTestCase
{


/**
* Verify tokenization a heredoc construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testHeredocSingleLine()
{
$expectedSequence = [
[T_START_HEREDOC => '<<<EOD'."\n"],
[T_HEREDOC => 'Some $var text'."\n"],
[T_END_HEREDOC => 'EOD'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testHeredocSingleLine()


/**
* Verify tokenization a nowdoc construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testNowdocSingleLine()
{
$expectedSequence = [
[T_START_NOWDOC => "<<<'MARKER'\n"],
[T_NOWDOC => 'Some text'."\n"],
[T_END_NOWDOC => 'MARKER'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testNowdocSingleLine()


/**
* Verify tokenization a multiline heredoc construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testHeredocMultiLine()
{
$expectedSequence = [
[T_START_HEREDOC => '<<<"😬"'."\n"],
[T_HEREDOC => 'Lorum ipsum'."\n"],
[T_HEREDOC => 'Some $var text'."\n"],
[T_HEREDOC => 'dolor sit amet'."\n"],
[T_END_HEREDOC => '😬'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testHeredocMultiLine()


/**
* Verify tokenization a multiline testNowdocSingleLine construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testNowdocMultiLine()
{
$expectedSequence = [
[T_START_NOWDOC => "<<<'multi_line'\n"],
[T_NOWDOC => 'Lorum ipsum'."\n"],
[T_NOWDOC => 'Some text'."\n"],
[T_NOWDOC => 'dolor sit amet'."\n"],
[T_END_NOWDOC => 'multi_line'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testNowdocMultiLine()


/**
* Verify tokenization a multiline heredoc construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testHeredocEndsOnBlankLine()
{
$expectedSequence = [
[T_START_HEREDOC => '<<<EOD'."\n"],
[T_HEREDOC => 'Lorum ipsum'."\n"],
[T_HEREDOC => 'dolor sit amet'."\n"],
[T_HEREDOC => "\n"],
[T_END_HEREDOC => 'EOD'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testHeredocEndsOnBlankLine()


/**
* Verify tokenization a multiline testNowdocSingleLine construct.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testNowdocEndsOnBlankLine()
{
$expectedSequence = [
[T_START_NOWDOC => "<<<'EOD'\n"],
[T_NOWDOC => 'Lorum ipsum'."\n"],
[T_NOWDOC => 'dolor sit amet'."\n"],
[T_NOWDOC => "\n"],
[T_END_NOWDOC => 'EOD'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC);

$this->checkTokenSequence($target, $expectedSequence);

}//end testNowdocEndsOnBlankLine()


/**
* Test helper. Check a token sequence complies with an expected token sequence.
*
* @param int $startPtr The position in the file to start checking from.
* @param array<array<int|string, string>> $expectedSequence The consecutive token constants and their contents to expect.
*
* @return void
*/
private function checkTokenSequence($startPtr, array $expectedSequence)
{
$tokens = $this->phpcsFile->getTokens();

$sequenceKey = 0;
$sequenceCount = count($expectedSequence);

for ($i = $startPtr; $sequenceKey < $sequenceCount; $i++, $sequenceKey++) {
$currentItem = $expectedSequence[$sequenceKey];
$expectedCode = key($currentItem);
$expectedType = Tokens::tokenName($expectedCode);
$expectedContent = current($currentItem);
$errorMsgSuffix = PHP_EOL.'(StackPtr: '.$i.' | Position in sequence: '.$sequenceKey.' | Expected: '.$expectedType.')';

$this->assertSame(
$expectedCode,
$tokens[$i]['code'],
'Token tokenized as '.Tokens::tokenName($tokens[$i]['code']).', not '.$expectedType.' (code)'.$errorMsgSuffix
);

$this->assertSame(
$expectedType,
$tokens[$i]['type'],
'Token tokenized as '.$tokens[$i]['type'].', not '.$expectedType.' (type)'.$errorMsgSuffix
);

$this->assertSame(
$expectedContent,
$tokens[$i]['content'],
'Token content did not match expectations'.$errorMsgSuffix
);
}//end for

}//end checkTokenSequence()


}//end class
11 changes: 11 additions & 0 deletions tests/Core/Tokenizer/PHP/HeredocParseErrorTest.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

// This is an intentional parse error. This test should be the only test in the file!
// NOTE: this is NOT a _real_ merge conflict, but a valid test.

/* testUnclosedHeredoc */
<<<<<<< HEAD
$a = 10;
=======
$a = 20;
>>>>>>> master
41 changes: 41 additions & 0 deletions tests/Core/Tokenizer/PHP/HeredocParseErrorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php
/**
* Tests the tokenization for an unclosed heredoc construct.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;

/**
* Tests the tokenization for an unclosed heredoc construct.
*
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
*/
final class HeredocParseErrorTest extends AbstractTokenizerTestCase
{


/**
* Verify that a heredoc (and nowdoc) start token is retokenized to T_STRING if no closer is found.
*
* @return void
*/
public function testMergeConflict()
{
$tokens = $this->phpcsFile->getTokens();

$token = $this->getTargetToken('/* testUnclosedHeredoc */', [T_START_HEREDOC, T_STRING], '<<< HEAD'."\n");
$tokenArray = $tokens[$token];

$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (code)');
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (type)');

}//end testMergeConflict()


}//end class

0 comments on commit 3e93f2e

Please sign in to comment.