78643a89 |
* Class representing restaurant
*/
class Restaurant
{
private $url;
private $name;
private $xpaths = array();
public function getName()
{
return $this->name;
}
public function getUrl()
{
return $this->url;
}
public function getXpaths()
{
return $this->xpaths;
}
/**
* @param $name
* @param $url
* @param $xpaths
*/
function __construct($name, $url, $xpaths)
{
if (empty($url) || empty($xpaths)) {
throw new InvalidArgumentException("All argument are required");
}
$this->name = $name;
$this->url = $url;
if (is_string($xpaths)) {
$this->xpaths[] = $xpaths;
} else if (is_array($xpaths)) {
$this->xpaths = $xpaths;
} else {
throw new InvalidArgumentException("Xpaths must be string or array");
}
}
}
class Parser {
private $restaurants = array();
function __construct($restaurants)
{
if (empty($restaurants) || !is_array($restaurants)) {
throw new InvalidArgumentException("Array argument required");
}
$this->restaurants = $restaurants;
}
public function parse()
{
foreach ($this->restaurants as $restaurant) {
$source = file_get_contents($restaurant->getUrl());
if ($source === false) {
throw new RuntimeException("Can't read source address: " . $restaurant->getUrl());
}
$dom = new DOMDocument();
@$dom->loadHTML($source);
$xpath_obj = new DOMXPath($dom);
echo("Restaurant: <a href='{$restaurant->getUrl()}'>" . $restaurant->getName() . "</a><br>\n");
foreach ($restaurant->getXpaths() as $name => $xpath) {
$found = $xpath_obj->query($xpath);
if ($found === false || $found->length === 0) {
continue;
}
echo "<b>$name</b>";
$exists = false;
foreach ($found->item(0)->childNodes as $elem) {
$meal = trim($elem->nodeValue);
if (!empty($meal)) {
echo("\t<br>$meal\n");
$exists = true;
}
}
if (!$exists) {
echo "<br>{}";
}
echo "<br>";
}
echo "<br>";
}
}
}
$twenties = new Restaurant("Twenties", "http://www.twenties.sk/", array("Polievky" => '//*[@id="article"]/div[2]/p[1]'));
$pulitzer = new Restaurant("Pulitzer", "http://www.pulitzer.sk",
array("Polievky" => '//*[@id="soups"]',
"Hlavne jedla" => '//*[@id="meals"]'));
$slovak = new Restaurant("Slovak pub", "http://www.arcaderestaurant.sk/articles/public_menu/show-modules/id/24",
array("Salat" => '//*[@id="table2"]/tbody/tr[10]',
"Polievka" => '//*[@id="table2"]/tbody/tr[2]'));
$parser = new Parser(array($twenties, $pulitzer, $slovak));
$parser->parse();
/** |
18499246 |
$sites = [
'Pulitzer' => 'http://pulitzer.sk/',
'Twenties' => 'http://www.twenties.sk/'
];
$xpaths = [
'Pulitzer' => [
'Polievky' => '//*[@id="soups"]',
'Hlavne zradla' => '//*[@id="meals"]'
],
'Twenties' => [
'Polievka + hlavne zradla' => '//*[@id="article"]/div[2]/p[1]'
]
];
$output_file = '/tmp/results.php';
file_put_contents($output_file, '');
$dom = new DOMDocument();
foreach ($sites as $restaurant => $site)
{
@$dom->loadHTML(file_get_contents($site));
$xpath_obj = new DOMXPath($dom);
append_result('V tovarni na jedlo s nazvom *' . $restaurant . "* maju dneska toto: \n");
foreach ($xpaths[$restaurant] as $human_meal => $xpath)
{
append_result("\t$human_meal:\n");
$oh_my_god_here_are_saved_meals = $xpath_obj->query($xpath);
if ($oh_my_god_here_are_saved_meals === FALSE
OR $oh_my_god_here_are_saved_meals->length === 0)
{
continue;
}
$found_meal = FALSE; |