| Current Path : /home/purehotels/public_html/administrator/components/com_watchfulli/classes/ |
| Current File : /home/purehotels/public_html/administrator/components/com_watchfulli/classes/robots.php |
<?php
/**
* @version admin/classes/robots.php 2020-05-27 zanardigit
* @package Watchful Client
* @author Watchful
* @authorUrl https://watchful.net
* @copyright Copyright (c) 2012-2023 Watchful
* @license GNU/GPL v3 or later
*/
defined('_JEXEC') or die;
defined('WATCHFULLI_PATH') or die;
/**
* WatchfulliRobots class
*/
class WatchfulliRobots
{
/**
* robots.txt sections
*
* @var array array of objects
*/
public $sections = [];
/**
* Sitemap entries
*
* @var array
*/
public $sitemaps = [];
/**
* Host entries
*
* @var array
*/
public $hosts = [];
/**
* Crawl delay
*
* @var mixed
*/
public $delay = null;
/**
* Unknown lines in robots.txt
*
* @var array
*/
public $unknown = [];
/**
* @param string $content
*/
public function __construct($content)
{
$lines = preg_split('/\R/', $content);
// tmp storage
$agents = [];
$allow = [];
$disallow = [];
// shift lines off array so we're not holding them twice
while (!empty($lines))
{
// pull line off the stack, replacing comments and cleaning up
$line = trim(preg_replace('/\#.*?$/', '', array_shift($lines)));
// process user agent and allow/disallow FIRST
// since they go together
if (0 === stripos($line, 'User-agent'))
{
$agents[] = trim(str_ireplace('User-agent:', '', $line));
continue;
}
else
{
if (0 === stripos($line, 'Disallow:'))
{
$disallow[] = trim(str_ireplace('Disallow:', '', $line));
continue;
}
else
{
if (0 === stripos($line, 'Allow:'))
{
$allow[] = trim(str_ireplace('Allow:', '', $line));
continue;
} // check that user agent and at least one of the two options are not empty
else
{
// set and reset tmp
$this->addSection($agents, $disallow, $allow);
$agents = [];
$allow = [];
$disallow = [];
}
}
}
// skip empties
if (empty($line))
{
continue;
}
// process sitemap
if (0 === stripos($line, 'Sitemap:'))
{
$this->sitemaps[] = trim(str_ireplace('Sitemap:', '', $line));
continue;
}
// process host
if (0 === stripos($line, 'Host:'))
{
$this->hosts[] = trim(str_ireplace('Host:', '', $line));
continue;
}
if (0 === stripos($line, 'Crawl-delay:'))
{
$this->delay = trim(str_ireplace('Crawl-delay:', '', $line));
continue;
}
// by now the line is unknown
$this->unknown[] = trim($line);
}
}
/**
* List of user agents found in robots.txt
*
* @return array
*/
public function getAgents()
{
$agents = [];
if (!empty($this->sections))
{
foreach ($this->sections as $section)
{
$agents = array_merge($agents, $section->agents);
}
sort($agents);
$agents = array_unique($agents);
}
return $agents;
}
/**
* Get robots.txt paths by user agent
*
* @param string $agent
*
* @return stdClass object containing the agent data
* + agent string the agent name
* + allow array allowed paths
* + disallow array disallowed paths
*/
public function getPathsByAgent($agent)
{
$results = new stdClass();
$results->agent = $agent;
$results->allow = [];
$results->disallow = [];
if (!empty($this->sections))
{
foreach ($this->sections as $section)
{
if (in_array($agent, $section->agents))
{
$results->allow = array_merge($results->allow, $section->allow);
$results->disallow = array_merge($results->disallow, $section->disallow);
}
}
}
return $results;
}
/**
* Adds a robots.txt section
*
* @param array $agents
* @param array $disallow
* @param array $allow
*/
protected function addSection($agents, $disallow, $allow)
{
if (!empty($agents))
{
$section = new stdClass;
$section->agents = $agents;
$section->allow = $allow;
$section->disallow = $disallow;
$this->sections[] = $section;
}
}
}