Skip to content

Commit

Permalink
Update json ld scraper for basic bbcgoodfood.com support
Browse files Browse the repository at this point in the history
  • Loading branch information
ssnepenthe committed Feb 5, 2021
1 parent ccfbade commit 14948a2
Show file tree
Hide file tree
Showing 15 changed files with 1,258 additions and 10 deletions.
6 changes: 6 additions & 0 deletions src/Arr.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

class Arr
{
public static function fromList(string $list) : array
{
// @todo Incredibly naive... Need to revisit with Str::isList().
return explode(', ', $list);
}

/**
* Adapted from illuminate/support.
*
Expand Down
14 changes: 12 additions & 2 deletions src/Scrapers/SchemaOrgJsonLd.php
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ protected function extractCategories(Crawler $crawler, array $json)
}

if (is_string($categories)) {
if (Str::isList($categories, ',')) {
return Arr::fromList($categories, ', ');
}

return [$categories];
}

Expand Down Expand Up @@ -370,7 +374,7 @@ protected function extractUrl(Crawler $crawler, array $json)
return $url;
}

return null;
return $this->extractString($crawler, '[rel="canonical"]', ['href']);
}

/**
Expand All @@ -380,10 +384,16 @@ protected function extractUrl(Crawler $crawler, array $json)
*/
protected function extractYield(Crawler $crawler, array $json)
{
if (is_string($yield = Arr::get($json, 'recipeYield'))) {
$yield = Arr::get($json, 'recipeYield');

if (is_string($yield)) {
return $yield;
}

if (is_int($yield)) {
return (string) $yield;
}

return null;
}

Expand Down
22 changes: 14 additions & 8 deletions src/Str.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ public static function collapseWhitespace(string $string) : string
/**
* Adapted from danielstjules/stringy.
*/
public static function htmlDecode(string $value, int $flags = ENT_COMPAT) : string
public static function htmlDecode(string $string, int $flags = ENT_QUOTES) : string
{
return html_entity_decode($value, $flags, mb_internal_encoding());
return html_entity_decode($string, $flags, mb_internal_encoding());
}

public static function isList(string $string) : bool
{
// @todo Incredibly naive... Need to revisit at some point.
return false !== strpos($string, ',');
}

/**
Expand All @@ -28,14 +34,14 @@ public static function lines(string $string)
return static::split($string, '[\r\n]{1,2}');
}

public static function normalize(string $value) : string
public static function normalize(string $string) : string
{
$value = static::htmlDecode($value);
$value = static::tidy($value);
$value = static::stripTags($value);
$value = static::collapseWhitespace($value); // Also calls trim.
$string = static::htmlDecode($string);
$string = static::tidy($string);
$string = static::stripTags($string);
$string = static::collapseWhitespace($string); // Also calls trim.

return $value;
return $string;
}

/**
Expand Down
22 changes: 22 additions & 0 deletions tests/Scrapers/WwwBbcGoodFoodComTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

namespace RecipeScraperTests\Scrapers;

use RecipeScraperTests\ScraperTestCase;
use RecipeScraper\Scrapers\SchemaOrgJsonLd;

/**
* @group scraper
*/
class WwwBbcGoodFoodComTest extends ScraperTestCase
{
protected function getHost()
{
return 'www.bbcgoodfood.com';
}

protected function makeScraper()
{
return new SchemaOrgJsonLd;
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

198 changes: 198 additions & 0 deletions tests/data/html/www.bbcgoodfood.com/recipes-overnight-oats

Large diffs are not rendered by default.

Large diffs are not rendered by default.

198 changes: 198 additions & 0 deletions tests/data/html/www.bbcgoodfood.com/recipes-spicy-sausage-noodles

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

return [
'author' => 'Esther Clark',
'categories' => [
'Dinner',
'Supper',
],
'cookingMethod' => null,
'cookTime' => 'PT10M',
'cuisines' => null,
'description' => 'Need something speedy for dinner? Try this chicken, coated in flavourful za\'atar and served with spiced chickpeas. It\'s simple, but seriously delicious',
'image' => 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/chicken-with-crushed-harissa-chickpeas-f009cb0.jpg',
'ingredients' => [
'2 tbsp rapeseed oil',
'1 onion, chopped',
'1 red pepper, finely sliced',
'1 yellow pepper, finely sliced',
'4 chicken breasts',
'1 tbsp za\'atar',
'400g can chickpeas',
'1½ tbsp red harissa paste',
'150g baby spinach',
'½ small bunch of parsley, finely chopped',
'lemonwedges, to serve',
],
'instructions' => [
'Heat 1 tbsp of oil in a frying pan over a medium heat and fry the onions and peppers for 7 mins until softened and golden.',
'Meanwhile, put the chicken between two sheets of baking parchment and lightly bash until about 2cm thick. Mix together the remaining oil and the za\'atar, then rub over the chicken. Season to taste.',
'Heat the grill to high. Put the chicken on a baking tray lined with foil, and grill for 3-4 mins each side, or until golden and cooked through.',
'Heat the chickpeas in a pan with the harissa paste and 2 tbsp water until warmed through, then roughly mash with a potato masher. Wilt the spinach in a pan with 1 tbsp of water or in the microwave in a heatproof bowl. Stir the pepper and onion mixture, spinach and parsley through the chickpeas. Serve with the sliced chicken and the lemon wedges for squeezing over.',
],
'name' => 'Chicken with crushed harissa chickpeas',
'notes' => null, // @todo
'prepTime' => 'PT5M',
'publisher' => 'BBC Good Food',
'totalTime' => 'PT15M',
'url' => 'https://www.bbcgoodfood.com/recipes/chicken-crushed-harissa-chickpeas',
'yield' => '4',
];
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

return [
'author' => 'Anna Glover',
'categories' => [
'Afternoon tea',
'Dessert',
'Treat',
],
'cookingMethod' => '',
'cookTime' => 'PT40M',
'cuisines' => null,
'description' => 'Who could resist our chocolate and raspberry cake? Like a Victoria sponge but better, try budget-friendly frozen raspberries for the cream',
'image' => 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/chocolate-raspberry-cake-a1392fd.jpg',
'ingredients' => [
'225ml sunflower oil, plus extra for the tins',
'250g caster sugar',
'3 large eggs',
'225ml milk',
'250g self-raising flour',
'4 tbsp cocoa',
'1½ tsp bicarbonate of soda',
'150g raspberry jam',
'100g frozen raspberries, defrosted',
'300ml double cream',
'2 tbsp icing sugar',
],
'instructions' => [
'Heat the oven to 180C/160C fan/gas 4. Oil and line two round 20cm springform cake tins with baking parchment. Whisk the oil, sugar, eggs and milk in a bowl until smooth. Sieve the flour, cocoa and bicarb into another large bowl, then gradually mix in the wet ingredients.',
'Divide the mixture between the tins and bake for 35-40 mins until the cakes are risen and spring back when pressed. Leave to cool in the tins for 10 mins, then transfer to a wire rack to cool completely.',
'For the raspberry layer, stir the jam and the defrosted raspberries together. Once the cakes are cool, whip the cream with the sugar to soft peaks, then gently fold half the raspberry mixture through the cream to create a ripple effect.',
'Spoon most of the reserved raspberry mixture over one of the cakes, then dollop on half of the cream. Smooth over with a palette knife, then place the other sponge on top. Swirl over the remaining cream and swirl the last of the raspberry mixture through it. Will keep in the fridge for two days.',
],
'name' => 'Chocolate & raspberry birthday layer cake',
'notes' => null, // @todo
'prepTime' => 'PT20M',
'publisher' => 'BBC Good Food',
'totalTime' => 'PT1H',
'url' => 'https://www.bbcgoodfood.com/recipes/chocolate-raspberry-birthday-layer-cake',
'yield' => '12',
];
33 changes: 33 additions & 0 deletions tests/data/results/www.bbcgoodfood.com/recipes-overnight-oats.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

return [
'author' => 'Sophie Godwin - Cookery writer',
'categories' => [
'Breakfast',
'Brunch',
],
'cookingMethod' => null,
'cookTime' => null,
'cuisines' => null,
'description' => 'Adapt this recipe for easy overnight oats to suit your tastes. You can add dried fruit, seeds and nuts, grated apple or pear, or chopped tropical fruits - the perfect healthy breakfast.',
'image' => 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/overnight-oats-32a2747.jpg',
'ingredients' => [
'¼ tsp ground cinnamon',
'50g rolled porridge oats',
'2 tbsp natural yogurt',
'50g mixed berries',
'drizzle of honey',
'½ tbsp nut butter(we used almond)',
],
'instructions' => [
'The night before serving, stir the cinnamon and 100ml water (or milk) into your oats with a pinch of salt.',
'The next day, loosen with a little more water (or milk) if needed. Top with the yogurt, berries, a drizzle of honey and the nut butter.',
],
'name' => 'Overnight oats',
'notes' => null,
'prepTime' => 'PT10M',
'publisher' => 'BBC Good Food',
'totalTime' => 'PT10M',
'url' => 'https://www.bbcgoodfood.com/recipes/overnight-oats',
'yield' => '1',
];
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

return [
'author' => 'Anna Glover',
'categories' => [
'Dinner',
'Main course',
],
'cookingMethod' => null,
'cookTime' => 'PT3H',
'cuisines' => null,
'description' => 'Warm up as the cold nights set in with this glorious hotpot, with slow-cooked pork cooked in cider and sage. It\'s topped with a crispy layer of potatoes',
'image' => 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/slow-cooked-pork-cider-sage-hotpot-f3f88ab.jpg',
'ingredients' => [
'4 tbsp olive oil,plus a little extra',
'1kg diced pork shoulder',
'20g butter,cubed, plus a little extra',
'4 leeks,trimmed and thickly sliced',
'4 garlic cloves,crushed',
'3 tbsp plain flour',
'500ml dry cider',
'400ml chicken stock',
'2 bay leaves',
'½ small bunch parsley,finely chopped',
'small bunch sage,leaves picked, 5 left whole, the rest chopped',
'200ml single cream',
'400g Maris Piper or King Edward potatoes',
'400g sweet potatoes',
],
'instructions' => [
'Heat half of the oil in a deep ovenproof frying pan, or flameproof casserole dish, and fry the pork pieces over a medium high heat in batches until seared all over, then transfer to a plate. Add another 1 tbsp oil to the pan, if you need to, while you\'re cooking the batches. Once all the pork is seared, transfer to a plate and set aside.',
'Add another 1 tbsp oil to the pan with a little butter and fry half the leeks with a pinch of salt for 10 mins until tender. Add the garlic, fry for a minute, then stir in the flour.',
'Pour in the cider, a little at a time, stirring to pick up any bits stuck to the bottom of the pan and to combine everything. Add the stock, bay leaves and seared pork, then simmer, half-covered with a lid for 1-1½ hrs until the meat is just tender (it will later cook to the point of falling apart in the oven). Can be prepared a day ahead.',
'Heat the oven to 200C/180C fan/gas 6. Simmer uncovered for a few minutes to reduce the sauce, if you need to - it shouldn\'t be too liquid or the potatoes will sink into the sauce. Stir in the parsley, chopped sage, remaining leeks, and the cream, then season well.',
'Peel both types of potatoes and cut into slices 2mm thick, by hand or using a mandoline. Alternate layers of potato and sweet potato in circles over the pie, or randomly, if you prefer. Dot the cubed butter over the top and bake for 1-1½ hrs until the potato is tender. Nestle in the whole sage leaves, brushed in a little oil, for the last 10 mins. Leave to rest for 10 mins before serving.',
],
'name' => 'Slow-cooked pork, cider & sage hotpot',
'notes' => null, // @todo
'prepTime' => 'PT40M',
'publisher' => 'BBC Good Food',
'totalTime' => 'PT3H40M',
'url' => 'https://www.bbcgoodfood.com/recipes/slow-cooked-pork-cider-sage-hotpot',
'yield' => '6',
];
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

return [
'author' => 'Elena Silcock',
'categories' => [
'Dinner',
'Lunch',
'Pasta',
'Supper',
],
'cookingMethod' => null,
'cookTime' => 'PT10M',
'cuisines' => null,
'description' => 'Add sausages and green beans to noodles for this speedy stir-fry supper. Flavoured with ginger, chilli and garlic, it takes just 15 minutes from prep to plate',
'image' => 'https://images.immediate.co.uk/production/volatile/sites/30/2020/08/spicy-sausage-noodle-bowl-v2-ade9ec1.jpg',
'ingredients' => [
'2 sausages',
'1 garlic clove, crushed',
'1 tbsp grated ginger',
'1 red chilli, chopped',
'1 tbsp sesame oil',
'160g green beans',
'200g straight-to-wok noodles',
'soy sauce',
'fresh coriander leaves',
'sesame seeds',
],
'instructions' => [
'Squeeze the sausages out of their skins. Mix the sausagemeat with the garlic, ginger and red chilli, then fry in the sesame oil until browned. Add the green beans, then fry for 1 min more.',
'Tip in the straight-to-wok noodles and a splash of soy sauce, and toss together. Add a splash of water and put the lid on to allow the beans to steam for a couple of mins. Sprinkle with fresh coriander leaves and sesame seeds to serve.',
],
'name' => 'Spicy sausage noodles',
'notes' => null,
'prepTime' => 'PT5M',
'publisher' => 'BBC Good Food',
'totalTime' => 'PT15M',
'url' => 'https://www.bbcgoodfood.com/recipes/spicy-sausage-noodles',
'yield' => '2',
];
9 changes: 9 additions & 0 deletions tests/data/urls/www.bbcgoodfood.com.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?php

return array (
0 => 'https://www.bbcgoodfood.com/recipes/overnight-oats',
1 => 'https://www.bbcgoodfood.com/recipes/slow-cooked-pork-cider-sage-hotpot',
2 => 'https://www.bbcgoodfood.com/recipes/chocolate-raspberry-birthday-layer-cake',
3 => 'https://www.bbcgoodfood.com/recipes/spicy-sausage-noodles',
4 => 'https://www.bbcgoodfood.com/recipes/chicken-crushed-harissa-chickpeas',
);

0 comments on commit 14948a2

Please sign in to comment.