name; } public function getUrl() { return $this->url; } public function getXpaths() { return $this->xpaths; } /** * @param $name * @param $url * @param $xpaths */ function __construct($name, $url, $xpaths) { if (empty($url) || empty($xpaths)) { throw new InvalidArgumentException("All argument are required"); } $this->name = $name; $this->url = $url; if (is_string($xpaths)) { $this->xpaths[] = $xpaths; } else if (is_array($xpaths)) { $this->xpaths = $xpaths; } else { throw new InvalidArgumentException("Xpaths must be string or array"); } } } class Parser { private $restaurants = array(); function __construct($restaurants) { if (empty($restaurants) || !is_array($restaurants)) { throw new InvalidArgumentException("Array argument required"); } $this->restaurants = $restaurants; } public function parse() { foreach ($this->restaurants as $restaurant) { $source = file_get_contents($restaurant->getUrl()); if ($source === false) { throw new RuntimeException("Can't read source address: " . $restaurant->getUrl()); } $dom = new DOMDocument(); @$dom->loadHTML($source); $xpath_obj = new DOMXPath($dom); echo("Restaurant: " . $restaurant->getName() . "
\n"); foreach ($restaurant->getXpaths() as $name => $xpath) { $found = $xpath_obj->query($xpath); if ($found === false || $found->length === 0) { continue; } echo "$name"; $exists = false; foreach ($found->item(0)->childNodes as $elem) { $meal = trim($elem->nodeValue); if (!empty($meal)) { echo("\t
$meal\n"); $exists = true; } } if (!$exists) { echo "
{}"; } echo "
"; } echo "
"; } } } $twenties = new Restaurant("Twenties", "http://www.twenties.sk/", array("Polievky" => '//*[@id="article"]/div[2]/p[1]')); $pulitzer = new Restaurant("Pulitzer", "http://www.pulitzer.sk", array("Polievky" => '//*[@id="soups"]', "Hlavne jedla" => '//*[@id="meals"]')); $slovak = new Restaurant("Slovak pub", "http://www.arcaderestaurant.sk/articles/public_menu/show-modules/id/24", array("Salat" => '//*[@id="table2"]/tbody/tr[10]', "Polievka" => '//*[@id="table2"]/tbody/tr[2]')); $parser = new Parser(array($twenties, $pulitzer, $slovak)); $parser->parse(); /** * Lebo bez masa nie je den dnom. Maso je sucast jedla, preto chodim jest do * blizkych malych restauracii. */ /* $sites = [ 'Pulitzer' => 'http://pulitzer.sk/', 'Twenties' => 'http://www.twenties.sk/' ]; $xpaths = [ 'Pulitzer' => [ 'Polievky' => '//*[@id="soups"]', 'Hlavne zradla' => '//*[@id="meals"]' ], 'Twenties' => [ 'Polievka + hlavne zradla' => '//*[@id="article"]/div[2]/p[1]' ] ]; $output_file = '/tmp/results.php'; file_put_contents($output_file, ''); $dom = new DOMDocument(); foreach ($sites as $restaurant => $site) { @$dom->loadHTML(file_get_contents($site)); $xpath_obj = new DOMXPath($dom); append_result('V tovarni na jedlo s nazvom *' . $restaurant . "* maju dneska toto: \n"); foreach ($xpaths[$restaurant] as $human_meal => $xpath) { append_result("\t$human_meal:\n"); $oh_my_god_here_are_saved_meals = $xpath_obj->query($xpath); if ($oh_my_god_here_are_saved_meals === FALSE OR $oh_my_god_here_are_saved_meals->length === 0) { continue; } $found_meal = FALSE; foreach ($oh_my_god_here_are_saved_meals->item(0)->childNodes as $elem) { $meal_meal_meal = trim($elem->nodeValue); if (! empty($meal_meal_meal)) { append_result("\t\t$meal_meal_meal\n"); $found_meal = TRUE; } } if (! $found_meal) { append_result("\tNevaria!!! Daj mi niekto gulomet, nech ich zabijem!\n"); } else { append_result("\n"); } } } append_result("\n\nMake me better on http://git.cinan.sk/obedparser.git/ :) \n"); function append_result($text) { global $output_file; file_put_contents($output_file, $text, FILE_APPEND | FILE_TEXT); } */