Skip to content

Commit

Permalink
Import test content from live site via REST (WordPress#249)
Browse files Browse the repository at this point in the history
This adds a new import script that fetches workshop and lesson plan content from the live site for local development and testing. It relies on a server-side plugin that makes raw block content publicly available via the REST API.

Currently, it will fetch the most recent 50 `lesson-plan` and `wporg_workshop` posts. That could easily be extended to include other CPTs as needed.

There is some server-side code that will only allow exporting posts that use a limited set of blocks, in order to limit the potential for disclosing data. That set of blocks can also be tweaked as needed.
  • Loading branch information
tellyworth authored Jul 25, 2022
1 parent 6cdd7e0 commit 00ed699
Show file tree
Hide file tree
Showing 5 changed files with 247 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .wp-env.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"wp-content": "./wp-content",
"wp-content/mu-plugins/0-sandbox.php": "./.wp-env/0-sandbox.php",
"wp-content/uploads/wporg_events.sql": "./.wp-env/wporg_events.sql",
"wp-content/uploads/wporg_locales.sql": "./.wp-env/wporg_locales.sql"
"wp-content/uploads/wporg_locales.sql": "./.wp-env/wporg_locales.sql",
"bin": "./bin"
}
}
91 changes: 91 additions & 0 deletions bin/import-test-content.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/php
<?php

namespace WPOrg_Learn\Bin\ImportTestContent;

/**
* CLI script for generating local test content, fetched from the live learn.wordpress.org site.
*
* This needs to be run in a wp-env, for example:
*
* yarn run wp-env run cli "php bin/import-test-content.php"
*/

// This script should only be called in a CLI environment.
if ( 'cli' != php_sapi_name() ) {
die();
}


$opts = getopt( '', array( 'post:', 'url:', 'abspath:', 'age:' ) );

require dirname( dirname( __FILE__ ) ) . '/wp-load.php';

if ( 'local' !== wp_get_environment_type() ) {
die( 'Not safe to run on ' . esc_html( get_site_url() ) );
}

/**
* Sanitize postmeta from the rest API for the format required by wp_insert_post.
*
* @return array An array suitable for meta_input.
*/
function sanitize_meta_input( $meta ) {
$meta = array( $meta );
foreach ( $meta as $k => $v ) {
if ( is_array( $v ) ) {
$meta[ $k ] = implode( ',', $v );
}
}

return $meta;
}

/**
* Import posts from a remote REST API to the local test site.
*
* @param string $rest_url The remote REST API endpoint URL.
*/
function import_rest_to_posts( $rest_url ) {
$response = wp_remote_get( $rest_url );
$status_code = wp_remote_retrieve_response_code( $response );

if ( is_wp_error( $response ) ) {
die( esc_html( $response->get_error_message() ) );
} elseif ( 200 !== wp_remote_retrieve_response_code( $response ) ) {
die( esc_html( "HTTP Error $status_code \n" ) );
}

$body = wp_remote_retrieve_body( $response );
$data = json_decode( $body );

foreach ( $data as $post ) {
echo esc_html( "Got {$post->type} {$post->id} {$post->slug}\n" );

// Surely there's a neater way to do this.
$newpost = array(
'import_id' => $post->id,
'post_date' => gmdate( 'Y-m-d H:i:s', strtotime( $post->date ) ),
'post_name' => $post->slug,
'post_title' => $post->title,
'post_status' => $post->status,
'post_type' => $post->type,
'post_title' => $post->title->rendered,
'post_content' => ( $post->content_raw ?? $post->content->rendered ),
'post_parent' => $post->parent,
'comment_status' => $post->comment_status,
'meta_input' => sanitize_meta_input( $post->meta ),
);

$new_post_id = wp_insert_post( $newpost, true );

if ( is_wp_error( $new_post_id ) ) {
die( esc_html( $new_post_id->get_error_message() ) );
}

echo esc_html( "Inserted $post->type $post->id as $new_post_id\n" );
}
}

import_rest_to_posts( 'https://learn.wordpress.org/wp-json/wp/v2/wporg_workshop?context=export&per_page=50' );
import_rest_to_posts( 'https://learn.wordpress.org/wp-json/wp/v2/lesson-plan?context=export&per_page=50' );
3 changes: 3 additions & 0 deletions bin/index.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ npm run wp-env run cli wp rewrite structure '/%postname%/'
# Import tables
npm run wp-env run cli wp db import wp-content/uploads/wporg_events.sql
npm run wp-env run cli wp db import wp-content/uploads/wporg_locales.sql

# Import content
npm run wp-env run cli "php bin/import-test-content.php"
150 changes: 150 additions & 0 deletions wp-content/plugins/wporg-learn/inc/export.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
<?php

/**
* Allow some raw data to be exposed in the REST API for certain post types, so that developers can import
* a copy of production data for local testing.
*
* ⚠️ Be careful to only expose public data!
*/

namespace WPOrg_Learn\Export;

defined( 'WPINC' ) || die();

add_action( 'rest_api_init', function() {
// Important: only expose raw post content for specific post types
register_raw_content_for_post_type( 'lesson-plan' );
register_raw_content_for_post_type( 'wporg_workshop' );
} );

/**
* Register a `wporg_export` context for a given post type.
*
* @param string $post_type Post type to allow for export.
*/
function register_raw_content_for_post_type( $post_type ) {

register_rest_field(
$post_type,
'content_raw',
array(
'get_callback' => __NAMESPACE__ . '\show_post_content_raw',
'schema' => array(
'type' => 'string',
'context' => array( 'wporg_export' ),
),
)
);

add_filter( "rest_{$post_type}_item_schema", __NAMESPACE__ . '\add_export_context_to_schema' );
}

/**
* Filter a CPT item schema and make it so that every item with 'view' context also has 'export' context.
*
* @param array $schema The schema object.
*/
function add_export_context_to_schema( $schema ) {
update_schema_array_recursive( $schema );

return $schema;
}

/**
* Find every item in the schema that has a 'view' context, and add an 'export' context to it.
* Had to use a recursive function because array_walk_recursive only walks leaf nodes.
*
* @param array $schema The schema object.
*/
function update_schema_array_recursive( &$schema ) {
foreach ( $schema as $key => &$value ) {
// Head recursion
if ( is_array( $value ) ) {
update_schema_array_recursive( $value );
}
if ( 'context' === $key && in_array( 'view', $value ) ) {
$value[] = 'wporg_export';
}
}
}

/**
* Given an array of blocks, return an array of just the names of those blocks.
*
* @param array $blocks An array of blocks.
* @return array An array of block names.
*/
function get_all_block_names( $blocks ) {
$block_names = array();
if ( ! $blocks ) {
return array();
}
foreach ( $blocks as $block ) {
if ( null !== $block['blockName'] ) {
$block_names[] = $block['blockName'];
if ( $block['innerBlocks'] ) {
// Recursive call to get inner blocks
$block_names = array_merge( $block_names, get_all_block_names( $block['innerBlocks'] ) );
}
}
}

return array_unique( $block_names );
}

/**
* Callback: If a post contains only allowed blocks, then return the raw block markup for the post.
*
* @param array $object The post object relating to the REST request.
* @param string $field_name The field name.
* @param array $request The request object.
*
* @return string The raw post content, if it contains only allowed blocks; a placeholder string otherwise.
*/
function show_post_content_raw( $object, $field_name, $request ) {

/**
* Filter: Modify the list of blocks permitted in posts available via the 'export' context.
* Posts containing any other blocks will not be exported.
*
* @param array $allowed_blocks An array of allowed block names. Simple wildcards are permitted, like 'core/*'.
*/
$allowed_blocks = apply_filters( 'allow_raw_block_export', array(
'core/*',
'wporg/*',
// other allowed blocks:
'jetpack/image-compare',
'jetpack/tiled-gallery',
'syntaxhighlighter/code',
) );

if ( ! empty( $object['id'] ) ) {
$post = get_post( $object['id'] );
} else {
$post = get_post();
}

// Exit early if the post contains any blocks that are not explicitly allowed.
if ( $post && has_blocks( $post->post_content ) || true ) {

$regexes = array();
foreach ( $allowed_blocks as $allowed_block_name ) {
$regexes[] = strtr( preg_quote( $allowed_block_name, '#' ), array( '\*' => '.*' ) );
}

$regex = '#^(' . implode( '|', $regexes ) . ')$#';

$blocks = parse_blocks( $post->post_content );
$block_names = get_all_block_names( $blocks );

foreach ( $block_names as $block_name ) {
// If it contains a disallowed block, then return no content.
// Better to raise an error instead?
if ( ! preg_match( $regex, $block_name ) ) {
return '<p>Post contains a disallowed block ' . esc_html( $block_name ) . '</p>';
}
}
}

return $post->post_content;
}
1 change: 1 addition & 0 deletions wp-content/plugins/wporg-learn/wporg-learn.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ function load_files() {
require_once get_includes_path() . 'profiles.php';
require_once get_includes_path() . 'sensei.php';
require_once get_includes_path() . 'taxonomy.php';
require_once get_includes_path() . 'export.php';
}

/**
Expand Down

0 comments on commit 00ed699

Please sign in to comment.