-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature #200 - data generator without php code prototype
- Loading branch information
Showing
21 changed files
with
890 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace MakinaCorpus\DbToolsBundle\Anonymization\Datasource; | ||
|
||
use MakinaCorpus\DbToolsBundle\Error\DatasourceException; | ||
|
||
class Context | ||
{ | ||
private array $datasources = []; | ||
|
||
public function __construct(iterable $datasources) | ||
{ | ||
foreach ($datasources as $datasource) { | ||
if (!$datasource instanceof Datasource) { | ||
throw new \InvalidArgumentException(\sprintf("Value is not a '%s' instance.", Datasource::class)); | ||
} | ||
$this->datasources[$datasource->getName()] = $datasource; | ||
} | ||
} | ||
|
||
/** | ||
* Get a single datasource. | ||
*/ | ||
public function getDatasource(string $name): Datasource | ||
{ | ||
return $this->datasources[$name] ?? throw new DatasourceException(\sprintf("Datasource '%s' does not exist.", $name)); | ||
} | ||
|
||
/** | ||
* Does datasource exists. | ||
*/ | ||
public function hasDatasource(string $name): bool | ||
{ | ||
return \array_key_exists($name, $this->datasources); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
<?php | ||
Check warning on line 1 in src/Anonymization/Datasource/Datasource.php GitHub Actions / PHP CS Fixer (8.2)
Check warning on line 1 in src/Anonymization/Datasource/Datasource.php GitHub Actions / PHP CS Fixer (8.2)
|
||
|
||
declare(strict_types=1); | ||
|
||
namespace MakinaCorpus\DbToolsBundle\Anonymization\Datasource; | ||
|
||
use MakinaCorpus\DbToolsBundle\Error\DatasourceException; | ||
|
||
/** | ||
* There are two usages for the datasource: | ||
* | ||
* - You fill a sample table: in this case, it's best to assume that the | ||
* default behavior is to create a sample table which contains all the | ||
* datalist. In this case, we need the datasource to be an iterator | ||
* which will not consume any memory while reading the file. | ||
* | ||
* - The second use case if when using it as an expression datasource, | ||
* then we need to be able to randomly select a line in the datasource, | ||
* which means we probably need to load it into memory. | ||
* | ||
* In regard of the second use case, the default implementations will always | ||
* load all data into memory, and we'll see what happens next. | ||
* | ||
* If this causes trouble, we might want to implement some kind of random | ||
* line read in files directly algorithm, it does not really seem that | ||
* difficult to implement. | ||
*/ | ||
abstract class Datasource implements \Countable | ||
{ | ||
public function __construct( | ||
private string $name, | ||
) {} | ||
|
||
/** | ||
* Get datasource name. | ||
*/ | ||
public function getName(): string | ||
{ | ||
return $this->name; | ||
} | ||
|
||
/** | ||
* Get random value in. | ||
* | ||
* @return string|array<string> | ||
*/ | ||
public abstract function random(Context $context): string|array; | ||
|
||
/** | ||
* Get all values iterator. | ||
* | ||
* @return iterable<string>|iterable<array<string>> | ||
*/ | ||
public abstract function iterator(Context $context): iterable; | ||
|
||
/** | ||
* Raise an error. | ||
*/ | ||
protected function throwError(string|\Throwable $error): never | ||
{ | ||
$prefix = \sprintf('Datasource "%s": ', $this->name); | ||
|
||
if ($error instanceof \Throwable) { | ||
throw new DatasourceException($prefix . $error->getMessage(), 0, $error); | ||
} | ||
throw new DatasourceException($prefix . $error); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace MakinaCorpus\DbToolsBundle\Anonymization\Datasource; | ||
|
||
class EnumDatasource extends Datasource | ||
{ | ||
private ?array $data = null; | ||
private ?string $filename = null; | ||
private array $expressions = []; | ||
|
||
public function __construct(string $name, string|array $source, ?array $expressions = []) | ||
{ | ||
parent::__construct($name); | ||
|
||
if (\is_array($source)) { | ||
$this->data = $source; | ||
} else { | ||
$this->filename = $source; | ||
} | ||
|
||
foreach (\array_values($expressions) as $number => $expression) { | ||
if (\is_string($expression)) { | ||
$this->expressions[] = new Expression($expression, $name, $number); | ||
} else if ($expression instanceof Expression) { | ||
$this->expressions[] = $expression; | ||
} else { | ||
$this->throwError(\sprintf("expression #%d is not a string nor a '%s' instance.", $number, Expression::class)); | ||
} | ||
} | ||
} | ||
|
||
#[\Override] | ||
public function random(Context $context): string|array | ||
{ | ||
if ($this->expressions) { | ||
$expression = $this->expressions[\rand(0, \count($this->expressions) - 1)]; | ||
\assert($expression instanceof Expression); | ||
|
||
return $expression->execute($context); | ||
} | ||
|
||
return $this->rawRandom(); | ||
} | ||
|
||
#[\Override] | ||
public function iterator(Context $context): iterable | ||
{ | ||
return (fn () => yield from $this->data)(); | ||
} | ||
|
||
#[\Override] | ||
public function count(): int | ||
{ | ||
$this->initialize(); | ||
|
||
return \count($this->data); | ||
} | ||
|
||
/** | ||
* Get a random item from the data list, without expression handling. | ||
* | ||
* @internal | ||
* This is being used in unit tests. | ||
*/ | ||
public function rawAt(int $position = 0): string | ||
{ | ||
$this->initialize(); | ||
|
||
return $this->data[$position]; | ||
} | ||
|
||
/** | ||
* Get a random item from the data list, without expression handling. | ||
* | ||
* @internal | ||
* This is being used in the Expression class. | ||
* @see Expression | ||
*/ | ||
public function rawRandom(): string | ||
{ | ||
$this->initialize(); | ||
|
||
return $this->data[\rand(0, \count($this->data) - 1)]; | ||
} | ||
|
||
/** | ||
* Internal values initialization. | ||
*/ | ||
private function initialize(): void | ||
{ | ||
if (null !== $this->data) { | ||
return; | ||
} | ||
|
||
if (null === $this->filename) { | ||
$this->throwError("was initialized without data nor filename."); | ||
} | ||
if (!\file_exists($this->filename)) { | ||
$this->throwError(\sprintf("file '%s': does not exist.", $this->filename)); | ||
} | ||
|
||
$this->data = []; | ||
|
||
$ext = ($pos = \strrpos($this->filename, '.')) ? \substr($this->filename, $pos + 1) : 'txt'; | ||
|
||
$source = match ($ext) { | ||
'js', 'json' => $this->parseJsonFile($this->filename), | ||
'txt' => $this->parseTextFile($this->filename), | ||
default => $this->throwError(\sprintf("file '%s': unsupported file format '%s'.", $this->filename, $ext)), | ||
}; | ||
|
||
foreach ($source as $line => $item) { | ||
if (!\is_string($item)) { | ||
$this->throwError(\sprintf("file '%s': line #%s is not a valid value.", $this->filename, $line)); | ||
} | ||
if (empty($item)) { | ||
// @todo log error? | ||
continue; | ||
} | ||
$this->data[] = $item; | ||
} | ||
} | ||
|
||
/** | ||
* Parse data from a JSON file. | ||
*/ | ||
private function parseJsonFile(string $filename): iterable | ||
{ | ||
$list = \json_decode(\file_get_contents($filename), true); | ||
|
||
if (!\is_array($list)) { | ||
$this->throwError(\sprintf("file '%s': does not contain valid JSON.", $this->filename)); | ||
} | ||
|
||
return (function () use ($list) { | ||
$count = 1; | ||
foreach ($list as $value) { | ||
if (!\is_string($value)) { | ||
$this->throwError(\sprintf("file '%s': item #%s is not a string.", $this->filename, $count)); | ||
} | ||
yield $count => $value; | ||
$count++; | ||
} | ||
})(); | ||
} | ||
|
||
/** | ||
* Parse data from a text file. | ||
*/ | ||
private function parseTextFile(string $filename): iterable | ||
{ | ||
if (!$handle = \fopen($filename, 'r')) { | ||
$this->throwError(\sprintf("file '%s': could not open file for reading.", $this->filename)); | ||
} | ||
|
||
return (function () use ($handle) { | ||
try { | ||
$count = 1; | ||
while ($value = \fgets($handle)) { | ||
yield $count => \trim($value); | ||
$count++; | ||
} | ||
} finally { | ||
@\fclose($handle); | ||
} | ||
})(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace MakinaCorpus\DbToolsBundle\Anonymization\Datasource; | ||
|
||
use MakinaCorpus\DbToolsBundle\Anonymization\Datasource\Expression\Parser; | ||
use MakinaCorpus\DbToolsBundle\Anonymization\Datasource\Expression\Token; | ||
|
||
class Expression | ||
{ | ||
/** @var Token[] */ | ||
private array $tokens = []; | ||
|
||
/** | ||
* All other data than the raw text is here only for error handling and | ||
* building helping error messages for end-users. | ||
* | ||
* @param string $raw | ||
* User text. | ||
* @param string $datasource | ||
* Datasource in which this expression is found. | ||
* @param int $number | ||
* Expression number in datasource. | ||
*/ | ||
public function __construct(string $raw, string $datasource, int $number) | ||
{ | ||
$this->tokens = (new Parser($raw, $datasource, $number))->parse(); | ||
} | ||
|
||
/** | ||
* Execute given expression over the given context. | ||
*/ | ||
public function execute(Context $context): ?string | ||
{ | ||
$ret = ''; | ||
foreach ($this->tokens as $token) { | ||
\assert($token instanceof Token); | ||
$ret .= $token->execute($context); | ||
} | ||
return $ret; | ||
} | ||
|
||
/** | ||
* @internal | ||
* For unit tests. | ||
*/ | ||
public function toArray(): array | ||
{ | ||
return $this->tokens; | ||
} | ||
} |
Oops, something went wrong.