18499246 |
<?php
/** |
78643a89 |
* Class representing restaurant
*/ |
25728d97 |
class Restaurant { |
78643a89 |
private $url;
private $name;
private $xpaths = array();
|
25728d97 |
public function getName() { |
78643a89 |
return $this->name;
}
|
25728d97 |
public function getUrl() { |
78643a89 |
return $this->url;
}
|
25728d97 |
public function getXpaths() { |
78643a89 |
return $this->xpaths;
}
/**
* @param $name
* @param $url
* @param $xpaths
*/ |
25728d97 |
function __construct($name, $url, $xpaths) { |
78643a89 |
if (empty($url) || empty($xpaths)) {
throw new InvalidArgumentException("All argument are required");
}
$this->name = $name;
$this->url = $url;
if (is_string($xpaths)) {
$this->xpaths[] = $xpaths;
} else if (is_array($xpaths)) {
$this->xpaths = $xpaths;
} else {
throw new InvalidArgumentException("Xpaths must be string or array");
}
}
}
|
25728d97 |
interface iOutput {
public function log($message);
public function display();
}
class Output implements iOutput {
protected $output = "";
public function log($message) {
$this->output .= $message;
}
public function display(){}
}
class HtmlOutput extends Output {
public function display(){
echo $this->output;
}
}
class EmailOutput {
public function display(){
// Send mail
}
}
|
78643a89 |
class Parser {
private $restaurants = array(); |
25728d97 |
private $logger;
function __construct($logger, $restaurants) {
// Check if logger implements iOutput interface
if ( !in_array("iOutput", class_implements($logger)) ) {
throw new InvalidArgumentException("Logger class must implement interface iOutput");
} |
78643a89 |
|
25728d97 |
$this->logger = $logger; |
78643a89 |
if (empty($restaurants) || !is_array($restaurants)) {
throw new InvalidArgumentException("Array argument required");
}
$this->restaurants = $restaurants;
}
|
25728d97 |
public function parse() { |
78643a89 |
foreach ($this->restaurants as $restaurant) {
$source = file_get_contents($restaurant->getUrl());
if ($source === false) {
throw new RuntimeException("Can't read source address: " . $restaurant->getUrl());
}
$dom = new DOMDocument();
@$dom->loadHTML($source);
$xpath_obj = new DOMXPath($dom);
|
25728d97 |
$this->logger->log("Restaurant: <a href='{$restaurant->getUrl()}'>" . $restaurant->getName() . "</a><br>\n"); |
78643a89 |
foreach ($restaurant->getXpaths() as $name => $xpath) {
$found = $xpath_obj->query($xpath);
if ($found === false || $found->length === 0) {
continue;
}
|
25728d97 |
$this->logger->log("<b>$name</b>"); |
78643a89 |
$exists = false;
foreach ($found->item(0)->childNodes as $elem) {
$meal = trim($elem->nodeValue);
if (!empty($meal)) { |
25728d97 |
$this->logger->log("\t<br>$meal\n"); |
78643a89 |
$exists = true;
}
}
if (!$exists) { |
25728d97 |
$this->logger->log("<br>{}"); |
78643a89 |
} |
25728d97 |
$this->logger->log("<br>"); |
78643a89 |
} |
25728d97 |
$this->logger->log("<br>"); |
78643a89 |
}
}
}
$twenties = new Restaurant("Twenties", "http://www.twenties.sk/", array("Polievky" => '//*[@id="article"]/div[2]/p[1]'));
$pulitzer = new Restaurant("Pulitzer", "http://www.pulitzer.sk",
array("Polievky" => '//*[@id="soups"]',
"Hlavne jedla" => '//*[@id="meals"]'));
$slovak = new Restaurant("Slovak pub", "http://www.arcaderestaurant.sk/articles/public_menu/show-modules/id/24",
array("Salat" => '//*[@id="table2"]/tbody/tr[10]',
"Polievka" => '//*[@id="table2"]/tbody/tr[2]'));
|
25728d97 |
$htmlOutput = new HtmlOutput();
$parser = new Parser($htmlOutput, array($twenties, $pulitzer, $slovak)); |
78643a89 |
$parser->parse(); |
25728d97 |
$htmlOutput->display(); |
78643a89 |
/** |
18499246 |
* Lebo bez masa nie je den dnom. Maso je sucast jedla, preto chodim jest do
* blizkych malych restauracii.
*/
|
78643a89 |
/*
|
18499246 |
$sites = [
'Pulitzer' => 'http://pulitzer.sk/',
'Twenties' => 'http://www.twenties.sk/'
];
$xpaths = [
'Pulitzer' => [
'Polievky' => '//*[@id="soups"]',
'Hlavne zradla' => '//*[@id="meals"]'
],
'Twenties' => [
'Polievka + hlavne zradla' => '//*[@id="article"]/div[2]/p[1]'
]
];
$output_file = '/tmp/results.php';
file_put_contents($output_file, '');
$dom = new DOMDocument();
foreach ($sites as $restaurant => $site)
{
@$dom->loadHTML(file_get_contents($site));
$xpath_obj = new DOMXPath($dom);
append_result('V tovarni na jedlo s nazvom *' . $restaurant . "* maju dneska toto: \n");
foreach ($xpaths[$restaurant] as $human_meal => $xpath)
{
append_result("\t$human_meal:\n");
$oh_my_god_here_are_saved_meals = $xpath_obj->query($xpath);
if ($oh_my_god_here_are_saved_meals === FALSE
OR $oh_my_god_here_are_saved_meals->length === 0)
{
continue;
}
$found_meal = FALSE; |
25728d97 |
foreach ($oh_my_god_here_are_saved_meals->item(0)->childNodes as $elem) |
18499246 |
{
$meal_meal_meal = trim($elem->nodeValue);
if (! empty($meal_meal_meal))
{
append_result("\t\t$meal_meal_meal\n");
$found_meal = TRUE;
}
}
if (! $found_meal)
{
append_result("\tNevaria!!! Daj mi niekto gulomet, nech ich zabijem!\n");
} |
78643a89 |
else |
18499246 |
{
append_result("\n");
}
}
}
append_result("\n\nMake me better on http://git.cinan.sk/obedparser.git/ :) \n");
function append_result($text)
{
global $output_file;
file_put_contents($output_file, $text, FILE_APPEND | FILE_TEXT);
} |
78643a89 |
*/ |