Hello,
I have a problem when I want to return a drop from a Spider Middleware, it is not fired. I feel that the handleRequest method is not fired too. Can you help me please ?
Code
Spiders/Spider1.php
<?php
namespace App\Spiders;
use App\Spiders\Processors\SaveJobToDatabaseProcessor;
use App\Spiders\SpiderMiddleware\CheckJobAlreadyExistsMiddleware;
use Generator;
use RoachPHP\Downloader\Middleware\RequestDeduplicationMiddleware;
use RoachPHP\Downloader\Middleware\UserAgentMiddleware;
use RoachPHP\Extensions\LoggerExtension;
use RoachPHP\Extensions\StatsCollectorExtension;
use RoachPHP\Http\Response;
use RoachPHP\Spider\BasicSpider;
use RoachPHP\Spider\ParseResult;
class Spider1 extends BasicSpider
{
public array $startUrls = [
'https://roach-php.dev/docs/spiders'
];
public array $downloaderMiddleware = [
RequestDeduplicationMiddleware::class,
[UserAgentMiddleware::class, ['userAgent' => 'Mozilla/5.0 (compatible; RoachPHP/0.1.0)']],
];
public array $spiderMiddleware = [
CheckJobAlreadyExistsMiddleware::class,
];
public array $itemProcessors = [
SaveJobToDatabaseProcessor::class,
];
public array $extensions = [
LoggerExtension::class,
StatsCollectorExtension::class,
];
public int $concurrency = 2;
public int $requestDelay = 1;
/**
* @return Generator<ParseResult>
*/
public function parse(Response $response): \Generator
{
$title = $response->filter('h1')->text();
$content = $response
->filter('main > div:nth-child(2) p:first-of-type')
->text();
yield $this->item([
'id' => '123456',
'title' => $title,
'content' => $content,
'contract_type' => 'CDI',
]);
}
}
Spiders/SpiderMiddleware/CheckJobAlreadyExistsMiddleware.php
<?php
namespace App\Spiders\SpiderMiddleware;
use App\Models\Job;
use RoachPHP\Http\Request;
use RoachPHP\Http\Response;
use RoachPHP\Support\Configurable;
use RoachPHP\Spider\Middleware\RequestMiddlewareInterface;
class CheckJobAlreadyExistsMiddleware implements RequestMiddlewareInterface
{
use Configurable;
public function handleRequest(Request $request, Response $response): Request
{
return $request->drop('This never happens');
}
}
Logs
[2022-10-05 16:19:14] local.INFO: Run starting
[2022-10-05 16:19:14] local.INFO: Dispatching request {"uri":"https://roach-php.dev/docs/spiders"}
[2022-10-05 16:19:15] local.INFO: Item scraped {"id":"123456","title":"Spiders","content":"Basic Concepts","contract_type":"CDI"}
[2022-10-05 16:19:15] local.INFO: Run statistics {"duration":"00:00:00","requests.sent":1,"requests.dropped":0,"items.scraped":1,"items.dropped":0}
[2022-10-05 16:19:15] local.INFO: Run finished
As you can see, "requests.dropped" is 0 instead of 1
Packages version
laravel/framework v9.34.0
roach-php/core 1.1.1
roach-php/laravel 1.0.0