diff --git a/.eslintrc.json b/.eslintrc.json index 85107470a..4ca16399d 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -22,6 +22,7 @@ "requestAnimationFrame": "readonly", "React": "readonly", "Block": "readonly", + "classifai_term_cleanup_params": "readonly", "classifAISettings": "readonly" }, "rules": { diff --git a/README.md b/README.md index 882281d54..acf3e66e7 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) * Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress) +* Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) *(note that this service has been [deprecated by Microsoft](https://learn.microsoft.com/en-us/azure/ai-services/personalizer/) and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality (see [issue#392](https://github.com/10up/classifai/issues/392))* * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) * Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) @@ -56,7 +57,8 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://aka.ms/oai/access) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. * To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account. -* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Smart 404 feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access and you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Term Cleanup feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access. For better performance, you will need [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. ## Pricing @@ -561,6 +563,46 @@ docker run -p 9200:9200 -d --name elasticsearch \ This will download, install and start Elasticsearch v7.9.0 to your local machine. You can then access Elasticsearch at `http://localhost:9200`, which is the same URL you can use to configure ElasticPress with. It is recommended that you change the `Content Items per Index Cycle` setting in ElasticPress to `20` to ensure indexing doesn't timeout. Also be aware of API rate limits on the OpenAI Embeddings API. +## Set Up the Term Cleanup Feature + +### 1. Decide on Provider + +* This Feature is powered by either OpenAI or Azure OpenAI. +* Once you've chosen a Provider, you'll need to create an account and get authentication details. + * When setting things up on the Azure side, ensure you choose either the `text-embedding-3-small` or `text-embedding-3-large` model. The Feature will not work with other models. + +### 2. Configure Settings under Tools > ClassifAI > Language Processing > Term Cleanup + +* Select the proper Provider in the provider dropdown. +* Enter your authentication details. +* Configure any other settings as desired. + +### 3. ElasticPress configuration + +It is recommended to use ElasticPress with this Feature, especially if processing more than 500 terms, as performance will be significantly better. Once the Term Cleanup Feature is configured, you can then proceed to get ElasticPress set up to index the data. + +If on a standard WordPress installation: + +* Install and activate the [ElasticPress](https://github.com/10up/elasticpress) plugin. +* Set your Elasticsearch URL in the ElasticPress settings (`ElasticPress > Settings`). +* Enable the [term index](https://www.elasticpress.io/blog/2023/03/enabling-comments-and-terms-in-elasticpress-5-0/) feature. +* Go to the `ElasticPress > Sync` settings page and trigger a sync, ensuring this is set to run a sync from scratch. This will send over the new schema to Elasticsearch and index all content, including creating vector embeddings for each term. + +If on a WordPress VIP hosted environment: + +* [Enable Enterprise Search](https://docs.wpvip.com/enterprise-search/enable/). +* [Enable the term index](https://docs.wpvip.com/enterprise-search/enable-features/#h-terms). Example command: `vip @example-app.develop -- wp vip-search activate-feature terms`. +* [Run the VIP-CLI `index` command](https://docs.wpvip.com/enterprise-search/index/). This sends the new schema to Elasticsearch and indexes all content, including creating vector embeddings for each term. Note you may need to use the `--setup` flag to ensure the schema is created correctly. + +### 4. Start the Term Cleanup Process + +Once configured, the plugin will add a new submenu under the Tools menu called Term Cleanup. + +* Go to the Term Cleanup page, click on your desired taxonomy, then click on the "Find similar" button. +* This initializes a background process that will compare each term to find ones that are similar. +* Once done, all the results will be displayed. +* You can then skip or merge the potential duplicate terms from the settings page. + ## Set Up Image Processing features (via Microsoft Azure) Note that [Azure AI Vision](https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/home#image-requirements) can analyze and crop images that meet the following requirements: diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php new file mode 100644 index 000000000..57c74eccb --- /dev/null +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -0,0 +1,294 @@ +taxonomy = $taxonomy; + + // Set parent defaults. + parent::__construct( + array( + 'singular' => 'similar_term', + 'plural' => 'similar_terms', + 'ajax' => false, + ) + ); + } + + /** + * Gets the list of columns. + * + * @return string[] Array of column titles keyed by their column name. + */ + public function get_columns() { + $tax = get_taxonomy( $this->taxonomy ); + $labels = get_taxonomy_labels( $tax ); + $label = $labels->singular_name ?? __( 'Term', 'classifai' ); + + return array( + 'term' => $label, + // translators: %s: Singular label of the taxonomy. + 'similar_term' => sprintf( __( 'Similar %s', 'classifai' ), $label ), + 'actions' => __( 'Action', 'classifai' ), + ); + } + + /** + * Prepares the list of items for displaying. + */ + public function prepare_items() { + $per_page = $this->get_items_per_page( 'edit_post_per_page' ); + $columns = $this->get_columns(); + $hidden = array(); + $sortable = $this->get_sortable_columns(); + $search = isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : ''; // phpcs:ignore WordPress.Security.NonceVerification.Recommended + + $this->_column_headers = array( $columns, $hidden, $sortable ); + + $total = wp_count_terms( + [ + 'taxonomy' => $this->taxonomy, + 'hide_empty' => false, + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'search' => $search, + ] + ); + + $this->set_pagination_args( + array( + 'total_items' => $total, // WE have to calculate the total number of items. + 'per_page' => $per_page, // WE have to determine how many items to show on a page. + 'total_pages' => ceil( $total / $per_page ), // WE have to calculate the total number of pages. + ) + ); + + $current = $this->get_pagenum(); + $offset = ( $current - 1 ) * $per_page; + + $terms = get_terms( + [ + 'taxonomy' => $this->taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => $per_page, + 'offset' => $offset, + 'search' => $search, + ] + ); + + $items = []; + + foreach ( $terms as $term_id ) { + $similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( ! $similar_terms ) { + continue; + } + + foreach ( $similar_terms as $k => $v ) { + $similar_term = get_term( $k ); + if ( $similar_term ) { + $items[] = [ + 'term' => get_term( $term_id ), + 'similar_term' => $similar_term, + 'score' => $v, + ]; + } else { + unset( $similar_terms[ $k ] ); + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + } + + if ( empty( $similar_terms ) ) { + delete_term_meta( $term_id, 'classifai_similar_terms' ); + } + } + + $this->items = $items; + } + + /** + * Generate term html to show it in Similar terms list table + * + * @param WP_Term $term Term Object. + * @param WP_Term $similar_term Similar Term Object. + * @param float $score Similarity score. + * @return string + */ + public function generate_term_html( $term, $similar_term, $score = null ) { + $args = array( + 'action' => 'classifai_merge_term', + 'taxonomy' => $this->taxonomy, + 'from' => $similar_term->term_id, + 'to' => $term->term_id, + 'paged' => $this->get_pagenum(), + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, // phpcs:ignore WordPress.Security.NonceVerification.Recommended + ); + $merge_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_merge_term' ) ); + $score = $score ? ( $score > 1 ? $score - 1 : $score ) : ''; + + return sprintf( + // translators: %s: Term name, %d: Term ID. + __( '%1$s (ID: %2$s)

', 'classifai' ) . + // translators: %s: Term slug. + __( 'Slug: %3$s
', 'classifai' ) . + // translators: %s: Term count. + __( 'Used: %4$s
', 'classifai' ) . + // translators: %s: Term parent name. + __( 'Parent: %5$s
', 'classifai' ) . + // translators: %s: Similarity score. + ( $score ? __( 'Similarity: %6$s
', 'classifai' ) : '%6$s' ) . + '%8$s', + esc_html( $term->name ), + '' . esc_html( $term->term_id ) . '', + esc_html( $term->slug ), + // translators: %d: Term count. + '' . esc_html( sprintf( _n( '%d time', '%d times', $term->count, 'classifai' ), $term->count ) ) . '', + esc_html( $term->parent > 0 ? get_term( $term->parent )->name : 'None' ), + $score ? esc_html( round( $score * 100, 2 ) . '%' ) : '', + esc_url( $merge_url ), + esc_html__( 'Merge and keep this', 'classifai' ) + ); + } + + /** + * Handles the term column output. + * + * @param array $item The current term item. + */ + public function column_term( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + $this->last_item_id = $term->term_id; + + return $this->generate_term_html( $term, $similar_term ); + } + + /** + * Handles the similar term column output. + * + * @param array $item The current term item. + */ + public function column_similar_term( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + + return $this->generate_term_html( $similar_term, $term, $item['score'] ); + } + + /** + * Handles the term actions output. + * + * @param array $item The current term item. + */ + public function column_actions( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + + $args = array( + 'action' => 'classifai_skip_similar_term', + 'taxonomy' => $this->taxonomy, + 'term' => $term->term_id, + 'similar_term' => $similar_term->term_id, + 'paged' => $this->get_pagenum(), + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, // phpcs:ignore WordPress.Security.NonceVerification.Recommended + ); + $skip_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_skip_similar_term' ) ); + + return sprintf( + "%s", + esc_url( $skip_url ), + esc_html__( 'Skip', 'classifai' ) + ); + } + + /** + * Generates content for a single row of the table + * + * @param array $item The current item. + * @param string $column_name The current column name. + */ + protected function column_default( $item, $column_name ) { + return esc_html( $item[ $column_name ] ); + } + + /** + * Generates custom table navigation to prevent conflicting nonces. + * + * @param string $which The location of the bulk actions: Either 'top' or 'bottom'. + */ + protected function display_tablenav( $which ) { + ?> +
+
+ bulk_actions( $which ); ?> +
+ extra_tablenav( $which ); + $this->pagination( $which ); + ?> +
+
+ last_item_id === $term->term_id ) { + $class .= ' skip'; + } + + echo ''; + $this->single_row_columns( $item ); + echo ''; + } +} diff --git a/includes/Classifai/Admin/templates/classifai-header.php b/includes/Classifai/Admin/templates/classifai-header.php index 426b53476..b3a54a014 100644 --- a/includes/Classifai/Admin/templates/classifai-header.php +++ b/includes/Classifai/Admin/templates/classifai-header.php @@ -10,7 +10,7 @@ // phpcs:ignore WordPress.Security.NonceVerification.Recommended $active_page = isset( $_GET['tab'] ) ? sanitize_text_field( wp_unslash( $_GET['tab'] ) ) : 'classifai_settings'; // phpcs:ignore WordPress.Security.NonceVerification.Recommended -$is_setup_page = isset( $_GET['page'] ) && 'classifai_setup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ); +$is_setup_page = isset( $_GET['page'] ) && ( 'classifai_setup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ) || 'classifai-term-cleanup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ) ); ?>
diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php new file mode 100644 index 000000000..106e9935c --- /dev/null +++ b/includes/Classifai/Features/TermCleanup.php @@ -0,0 +1,1153 @@ +label = __( 'Term Cleanup', 'classifai' ); + + // Contains all providers that are registered to the service. + $this->provider_instances = $this->get_provider_instances( LanguageProcessing::get_service_providers() ); + + // Contains just the providers this feature supports. + $this->supported_providers = [ + OpenAIEmbeddings::ID => __( 'OpenAI Embeddings', 'classifai' ), + AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ), + ]; + } + + /** + * Set up necessary hooks. + * + * This will always fire even if the Feature is not enabled. + */ + public function setup() { + parent::setup(); + + if ( $this->is_configured() && $this->is_enabled() ) { + // Check if ElasticPress plugin is installed and use EP selected. + if ( is_elasticpress_installed() && '1' === $this->get_settings( 'use_ep' ) ) { + $this->ep_integration = new TermCleanupEPIntegration( $this ); + $this->ep_integration->init(); + } + } + + $this->setting_page_url = admin_url( 'tools.php?page=classifai-term-cleanup' ); + + $this->background_process = new TermCleanupScheduler( 'classifai_schedule_term_cleanup_job' ); + $this->background_process->init(); + } + + /** + * Set up necessary hooks. + * + * This will only fire if the Feature is enabled. + */ + public function feature_setup() { + add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_admin_assets' ] ); + + // Register the settings page for the Feature. + add_action( 'admin_menu', [ $this, 'register_admin_menu_item' ] ); + add_action( 'admin_post_classifai_init_term_cleanup', [ $this, 'start_term_cleanup_process' ] ); + add_action( 'admin_post_classifai_cancel_term_cleanup', [ $this, 'cancel_term_cleanup_process' ] ); + add_action( 'admin_post_classifai_merge_term', [ $this, 'merge_term' ] ); + add_action( 'admin_post_classifai_skip_similar_term', [ $this, 'skip_similar_term' ] ); + + // Ajax action handler + add_action( 'wp_ajax_classifai_get_term_cleanup_status', [ $this, 'get_term_cleanup_status' ] ); + + // Admin notices + add_action( 'admin_notices', [ $this, 'render_notices' ] ); + } + + /** + * Enqueue the admin scripts. + * + * @param string $hook_suffix The current admin page. + */ + public function enqueue_admin_assets( string $hook_suffix ) { + if ( 'tools_page_classifai-term-cleanup' !== $hook_suffix ) { + return; + } + + wp_localize_script( + 'classifai-admin-script', + 'classifai_term_cleanup_params', + array( + 'ajax_url' => esc_url( admin_url( 'admin-ajax.php' ) ), + 'ajax_nonce' => wp_create_nonce( 'classifai-term-cleanup-status' ), + ) + ); + } + + /** + * Register a sub page under the Tools menu. + */ + public function register_admin_menu_item() { + // Don't register the menu if no taxonomies are enabled. + if ( empty( $this->get_all_feature_taxonomies() ) ) { + return; + } + + add_submenu_page( + 'tools.php', + __( 'Term Cleanup', 'classifai' ), + __( 'Term Cleanup', 'classifai' ), + 'manage_options', + 'classifai-term-cleanup', + [ $this, 'render_settings_page' ] + ); + } + + /** + * Render the settings page for the Term Cleanup Feature. + */ + public function render_settings_page() { + $active_tax = isset( $_GET['tax'] ) ? sanitize_text_field( wp_unslash( $_GET['tax'] ) ) : ''; // phpcs:ignore WordPress.Security.NonceVerification.Recommended + $all_taxonomies = $this->get_taxonomies(); + $taxonomies = $this->get_all_feature_taxonomies(); + ?> + +
+ +

+ +

+
+ + +
+

+ +
+
+

+ background_process && $this->background_process->in_progress() ) { + $this->render_background_processing_status( $active_tax ); + } else { + $plural_label = strtolower( $this->get_taxonomy_label( $active_tax, true ) ); + $singular_label = strtolower( $this->get_taxonomy_label( $active_tax, false ) ); + + // translators: %s: Taxonomy name. + $submit_label = sprintf( __( 'Find similar %s', 'classifai' ), esc_attr( $plural_label ) ); + ?> +

+ +

+
+
+ + + + +
+
+ +
+
+ render_similar_terms( $active_tax ); + ?> +
+
+
+
+
+ get_taxonomies(); + + foreach ( $taxonomies as $name => $label ) { + if ( 'category' === $name ) { + $tax_settings[ $name ] = 1; + } else { + $tax_settings[ $name ] = 0; + } + + $tax_settings[ "{$name}_threshold" ] = 75; + } + + $settings = [ + 'provider' => OpenAIEmbeddings::ID, + 'use_ep' => is_elasticpress_installed() ? 1 : 0, + 'taxonomies' => $tax_settings, + ]; + + return $settings; + } + + /** + * Sanitizes the default feature settings. + * + * @param array $new_settings Settings being saved. + * @return array + */ + public function sanitize_default_feature_settings( array $new_settings ): array { + if ( empty( $new_settings['use_ep'] ) || 1 !== (int) $new_settings['use_ep'] ) { + $new_settings['use_ep'] = 'no'; + } else { + $new_settings['use_ep'] = '1'; + } + + return $new_settings; + } + + /** + * Get meta key for embeddings. + * + * @return string + */ + public function get_embeddings_meta_key(): string { + $provider = $this->get_feature_provider_instance(); + $meta_key = 'classifai_openai_embeddings'; + + if ( $provider instanceof AzureEmbeddings ) { + $meta_key = 'classifai_azure_openai_embeddings'; + } + + /** + * Filter the meta key for embeddings. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_embeddings_meta_key + * + * @param {string} $meta_key Meta key for embeddings. + * @param {TermCleanup} $this Feature instance. + * + * @return {string} Meta key for embeddings. + */ + return apply_filters( 'classifai_' . static::ID . '_embeddings_meta_key', $meta_key, $this ); + } + + /** + * Get all feature taxonomies. + * + * @return array + */ + public function get_all_feature_taxonomies(): array { + $taxonomies = $this->get_taxonomies(); + $settings = $this->get_settings( 'taxonomies' ); + + $enabled_taxonomies = []; + foreach ( $taxonomies as $name => $label ) { + if ( isset( $settings[ $name ] ) && (bool) $settings[ $name ] ) { + $enabled_taxonomies[] = $name; + } + } + + return $enabled_taxonomies; + } + + /** + * Start the term cleanup process. + */ + public function start_term_cleanup_process() { + if ( + empty( $_POST['classifai_term_cleanup_nonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_POST['classifai_term_cleanup_nonce'] ) ), 'classifai_term_cleanup' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + if ( ! $this->is_feature_enabled() ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $settings = $this->get_settings( 'taxonomies' ); + $taxonomy = isset( $_POST['classifai_term_cleanup_taxonomy'] ) ? sanitize_text_field( wp_unslash( $_POST['classifai_term_cleanup_taxonomy'] ) ) : ''; + $thresold = isset( $settings[ $taxonomy . '_threshold' ] ) ? absint( $settings[ $taxonomy . '_threshold' ] ) : 75; + + if ( empty( $taxonomy ) ) { + wp_die( esc_html__( 'Invalid taxonomy.', 'classifai' ) ); + } + + // Clear previously found similar terms. + $args = [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ]; + + $terms = get_terms( $args ); + + if ( ! empty( $terms ) ) { + foreach ( $terms as $term_id ) { + delete_term_meta( $term_id, 'classifai_similar_terms' ); + } + } + + $job_args = [ + [ + 'taxonomy' => $taxonomy, + 'thresold' => $thresold, + 'action' => 'term_cleanup', + 'embeddings_generated' => false, + 'processed' => 0, + 'term_id' => 0, + 'offset' => 0, + 'started_by' => get_current_user_id(), + 'job_id' => str_replace( '-', '', wp_generate_uuid4() ), + ], + ]; + + $this->background_process->schedule( $job_args ); + + $this->add_notice( + __( 'Process for finding similar terms has started.', 'classifai' ), + 'info' + ); + + // Redirect back to the settings page. + wp_safe_redirect( add_query_arg( 'tax', $taxonomy, $this->setting_page_url ) ); + exit; + } + + /** + * Cancel the term cleanup process. + */ + public function cancel_term_cleanup_process() { + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_cancel_term_cleanup' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + + $unschedule = $this->background_process->unschedule(); + + if ( $unschedule ) { + // Add a notice to inform the user that the process will be cancelled soon. + $this->add_notice( + __( 'Process for the finding similar terms will be cancelled soon.', 'classifai' ), + 'info' + ); + } + + // Redirect back to the settings page. + wp_safe_redirect( add_query_arg( 'tax', $taxonomy, $this->setting_page_url ) ); + exit; + } + + /** + * Get the max number of terms to process. + * + * @return int + */ + public function get_max_terms(): int { + return 100; + } + + /** + * Generate embeddings for the terms. + * + * @param string $taxonomy Taxonomy to process. + * @return bool|WP_Error True if embeddings were generated, false otherwise. + */ + public function generate_embeddings( string $taxonomy ) { + $exclude = []; + + // Exclude the uncategorized term. + if ( 'category' === $taxonomy ) { + // Exclude the uncategorized term. + $uncat_term = get_term_by( 'name', 'Uncategorized', 'category' ); + if ( $uncat_term ) { + $exclude = [ $uncat_term->term_id ]; + } + } + + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + $args = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'NOT EXISTS', + 'number' => $this->get_max_terms(), + 'exclude' => $exclude, // phpcs:ignore WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude + ]; + + $terms = get_terms( $args ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + $provider = $this->get_feature_provider_instance(); + + // Generate embedding data for each term. + foreach ( $terms as $term_id ) { + $result = $provider->generate_embeddings_for_term( $term_id, false, $this ); + + /** + * Fires when an embedding is generated for a term. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_generate_embedding + * + * @param {int} $term_id ID of term. + * @param {array|WP_Error} $result Result of embedding generation. + * @param {TermCleanup} $this Feature instance. + */ + do_action( 'classifai_feature_term_cleanup_generate_embedding', $term_id, $result, $this ); + + if ( is_wp_error( $result ) ) { + return $result; + } + } + + return true; + } + + /** + * Get similar terms. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool|WP_Error + */ + public function get_similar_terms( string $taxonomy, int $thresold, array $args = [] ) { + if ( class_exists( '\\ElasticPress\\Feature' ) && '1' === $this->get_settings( 'use_ep' ) ) { + return $this->get_similar_terms_using_elasticpress( $taxonomy, $thresold, $args ); + } + + return $this->get_similar_terms_using_wpdb( $taxonomy, $thresold, $args ); + } + + /** + * Get similar terms using WPDB. + * + * This method is used to get similar terms using MySQL database. + * This method is slower than using ElasticPress but can be used + * when ElasticPress is not installed or not in use. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool + */ + public function get_similar_terms_using_wpdb( string $taxonomy, int $thresold, array $args = [] ) { + $processed = $args['processed'] ?? 0; + $term_id = $args['term_id'] ?? 0; + $offset = $args['offset'] ?? 0; + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + if ( ! $term_id ) { + $params = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => 1, + 'offset' => $processed, + ]; + + if ( is_taxonomy_hierarchical( $taxonomy ) ) { + $params['parent'] = 0; + } + + $terms = get_terms( $params ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + $term_id = $terms[0]; + $offset = 0; + $args['term_id'] = $term_id; + $args['offset'] = $offset; + } + + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + $term_embedding = get_term_meta( $term_id, $meta_key, true ); + + if ( 1 === count( $term_embedding ) ) { + $term_embedding = $term_embedding[0]; + } + + global $wpdb; + $limit = apply_filters( 'classifai_term_cleanup_compare_limit', 2000, $taxonomy ); + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + // SQL query to retrieve term meta using joins + // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching -- Running a custom query to get 1k terms embeddings at a time. + $results = $wpdb->get_results( + $wpdb->prepare( + "SELECT DISTINCT t.term_id, tm.meta_value, tt.count + FROM {$wpdb->terms} AS t + INNER JOIN {$wpdb->term_taxonomy} AS tt ON t.term_id = tt.term_id + INNER JOIN {$wpdb->termmeta} AS tm ON t.term_id = tm.term_id + WHERE tt.taxonomy = %s + AND tm.meta_key = %s + AND t.term_id != %d + AND tt.parent = 0 + ORDER BY tt.count DESC + LIMIT %d OFFSET %d", + $taxonomy, + $meta_key, + $term_id, + $limit, + absint( $offset + $processed ) // Add the processed terms counts to the offset to skip already processed terms. + ) + ); + $count = count( $results ); + + $calculations = new EmbeddingCalculations(); + $similar_terms = []; + + foreach ( $results as $index => $result ) { + // Skip if the term is the same as the term we are comparing. + if ( $term_id === $result->term_id ) { + continue; + } + + $compare_term_id = $result->term_id; + $compare_embedding = maybe_unserialize( $result->meta_value ); + + if ( 1 === count( $compare_embedding ) ) { + $compare_embedding = $compare_embedding[0]; + } + + $similarity = $calculations->cosine_similarity( $term_embedding, $compare_embedding ); + if ( false !== $similarity && ( 1 - $similarity ) >= ( $thresold / 100 ) ) { + $similar_terms[ $compare_term_id ] = 1 - $similarity; + } + } + + if ( ! empty( $similar_terms ) ) { + $existing_similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( is_array( $existing_similar_terms ) ) { + $similar_terms = $existing_similar_terms + $similar_terms; + } + + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + + if ( $count < $limit ) { + $args['processed'] = $processed + 1; + $args['term_id'] = 0; + $args['offset'] = 0; + } else { + $args['offset'] = $offset + $limit; + } + + return $args; + } + + /** + * Get similar terms using Elasticsearch via ElasticPress. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool|WP_Error + */ + public function get_similar_terms_using_elasticpress( string $taxonomy, int $thresold, array $args = [] ) { + $processed = $args['processed'] ?? 0; + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + $params = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => 10, + 'offset' => $processed, + ]; + + if ( is_taxonomy_hierarchical( $taxonomy ) ) { + $params['parent'] = 0; + } + + $terms = get_terms( $params ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + if ( ! $this->ep_integration ) { + $this->ep_integration = new TermCleanupEPIntegration( $this ); + } + + foreach ( $terms as $term_id ) { + // Find similar terms for the term. + $search_results = $this->ep_integration->exact_knn_search( $term_id, 'term', 500, $thresold ); + + if ( is_wp_error( $search_results ) ) { + return $search_results; + } + + $similar_terms = []; + $filtered_results = array_filter( + $search_results, + function ( $result ) use ( $taxonomy ) { + return $result['taxonomy'] === $taxonomy; + } + ); + + foreach ( $filtered_results as $index => $result ) { + $compare_term_id = $result['term_id']; + $existing_similar_terms = get_term_meta( $compare_term_id, 'classifai_similar_terms', true ); + + // Skip if it is already present in the similar terms list of the term we are comparing. + if ( ! empty( $existing_similar_terms ) && isset( $existing_similar_terms[ $term_id ] ) ) { + continue; + } + + $similar_terms[ $compare_term_id ] = $result['score']; + } + + if ( ! empty( $similar_terms ) ) { + $existing_similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( is_array( $existing_similar_terms ) ) { + $similar_terms = $existing_similar_terms + $similar_terms; + } + + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + + $args['processed'] = $args['processed'] + 1; + } + + $args['term_id'] = 0; + + return $args; + } + + /** + * Get the background processing status. + * + * @param string $taxonomy Taxonomy to process. + * @return array + */ + public function get_background_processing_status( string $taxonomy ): array { + if ( ! $this->background_process ) { + return []; + } + + $args = $this->background_process->get_args(); + + if ( ! empty( $args ) ) { + foreach ( $args as $arg ) { + if ( 'term_cleanup' === $arg['action'] && $taxonomy === $arg['taxonomy'] ) { + return $arg; + } + } + } + + return []; + } + + /** + * Render the processing status. + * + * @param string $taxonomy Taxonomy to process. + */ + public function render_background_processing_status( $taxonomy ) { + $status = $this->get_background_processing_status( $taxonomy ); + + if ( empty( $status ) ) { + ?> +

+ + 'classifai_cancel_term_cleanup', + 'taxonomy' => $taxonomy, + ); + $cancel_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_cancel_term_cleanup' ) ); + $label = strtolower( $this->get_taxonomy_label( $taxonomy, true ) ); + ?> + +

+

+ +

+ + +

+ + +

+

+ + setting_page_url ); + $refresh = sprintf( + // translators: %s: Refresh the page link. + esc_html__( '%s to see these results.', 'classifai' ), + '' . esc_html__( 'Refresh the page', 'classifai' ) . '' + ); + echo wp_kses_post( + sprintf( + /* translators: %1$s: Taxonomy name, %d: Number of terms processed */ + __( 'Finding similar %1$s, %2$d %1$s processed. %3$s', 'classifai' ), + esc_html( $label ), + absint( $processed ), + ( absint( $processed ) > 0 ) ? $refresh : '' + ) + ); + ?> +

+ get_embeddings_meta_key() ); + $generated = wp_count_terms( + [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ] + ); + ?> +

+ + %2$d %1$s processed.', 'classifai' ), + esc_html( $label ), + absint( $generated ) + ) + ); + ?> +

+ + + +
+ + get_taxonomy_label( $taxonomy, true ); + $count = wp_count_terms( + [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ] + ); + + if ( $count > 0 ) { + ?> +

+ +

+
+ + + prepare_items(); + $list_table->search_box( esc_html__( 'Search', 'classifai' ), 'search-term' ); + $list_table->display(); + ?> +
+ name ?? __( 'Terms', 'classifai' ); + } else { + $label = $labels->singular_name ?? __( 'Term', 'classifai' ); + } + + return $label; + } + + /** + * Ajax handler for refresh compare status. + */ + public function get_term_cleanup_status() { + // Check the nonce for security + check_ajax_referer( 'classifai-term-cleanup-status', 'nonce' ); + + $data = array( + 'is_running' => false, + 'status' => '', + ); + $taxonomy = isset( $_POST['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_POST['taxonomy'] ) ) : ''; + + if ( empty( $taxonomy ) ) { + $data['error'] = __( 'Taxonomy is required', 'classifai' ); + wp_send_json_error( $data ); + } + + if ( $this->background_process->in_progress() ) { + $data['is_running'] = true; + ob_start(); + $this->render_background_processing_status( $taxonomy ); + $data['status'] = ob_get_clean(); + } + + wp_send_json_success( $data ); + } + + /** + * Merge term. + */ + public function merge_term() { + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_merge_term' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + $to = isset( $_GET['to'] ) ? absint( wp_unslash( $_GET['to'] ) ) : 0; + $from = isset( $_GET['from'] ) ? absint( wp_unslash( $_GET['from'] ) ) : 0; + $to_term = get_term( $to, $taxonomy ); + $from_term = get_term( $from, $taxonomy ); + $args = [ + 'tax' => $taxonomy, + 's' => isset( $_GET['s'] ) ? sanitize_text_field( wp_unslash( $_GET['s'] ) ) : false, + 'paged' => isset( $_GET['paged'] ) ? absint( wp_unslash( $_GET['paged'] ) ) : false, + ]; + $redirect = add_query_arg( $args, $this->setting_page_url ); + + if ( empty( $taxonomy ) || empty( $to ) || empty( $from ) ) { + $this->add_notice( + __( 'Invalid request.', 'classifai' ), + 'error' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + if ( $to === $from ) { + $this->add_notice( + __( 'Cannot merge term with itself.', 'classifai' ), + 'error' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + /** + * Fires before terms are merged together. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_pre_merge_term + * + * @param {int} $from Term ID being merged. + * @param {int} $to Term ID we're merging into. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_pre_merge_term', $from, $to, $taxonomy ); + + $ret = wp_delete_term( + $from, + $taxonomy, + array( + 'default' => $to, + 'force_default' => true, + ) + ); + + /** + * Fires after terms are merged together. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_post_merge_term + * + * @param {int} $from Term ID being merged. + * @param {int} $to Term ID we're merging into. + * @param {string} $taxonomy Taxonomy of terms being merged. + * @param {bool|int|WP_Error} $ret Result of merge process. + */ + do_action( 'classifai_feature_term_cleanup_post_merge_term', $from, $to, $taxonomy, $ret ); + + if ( is_wp_error( $ret ) ) { + $this->add_notice( + // translators: %s: Error message. + sprintf( __( 'Error merging terms: %s.', 'classifai' ), $ret->get_error_message() ), + 'error' + ); + } + + $this->add_notice( + // translators: %1$s: From term name, %2$s: To term name. + sprintf( __( 'Merged term "%1$s" into "%2$s".', 'classifai' ), $from_term->name, $to_term->name ), + 'success' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + /** + * Skip similar term. + */ + public function skip_similar_term() { + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_skip_similar_term' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + $term = isset( $_GET['term'] ) ? absint( wp_unslash( $_GET['term'] ) ) : 0; + $similar_term = isset( $_GET['similar_term'] ) ? absint( wp_unslash( $_GET['similar_term'] ) ) : 0; + $args = [ + 'tax' => $taxonomy, + 's' => isset( $_GET['s'] ) ? sanitize_text_field( wp_unslash( $_GET['s'] ) ) : false, + 'paged' => isset( $_GET['paged'] ) ? absint( wp_unslash( $_GET['paged'] ) ) : false, + ]; + $redirect = add_query_arg( $args, $this->setting_page_url ); + + /** + * Fires before a term is skipped. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_pre_skip_term + * + * @param {int} $term Term ID being skipped. + * @param {int} $similar_term Term ID that matched. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_pre_skip_term', $term, $similar_term, $taxonomy ); + + // SKip/Ignore the similar term. + $term_meta = get_term_meta( $term, 'classifai_similar_terms', true ); + if ( is_array( $term_meta ) && isset( $term_meta[ $similar_term ] ) ) { + unset( $term_meta[ $similar_term ] ); + if ( empty( $term_meta ) ) { + delete_term_meta( $term, 'classifai_similar_terms' ); + } else { + update_term_meta( $term, 'classifai_similar_terms', $term_meta ); + } + } + + /** + * Fires after a term is skipped. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_post_skip_term + * + * @param {int} $term Term ID being skipped. + * @param {int} $similar_term Term ID that matched. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_post_skip_term', $term, $similar_term, $taxonomy ); + + $this->add_notice( + esc_html__( 'Skipped similar term.', 'classifai' ), + 'success' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + /** + * Add a notice to be displayed. + * + * @param string $message Message to display. + * @param string $type Type of notice. + */ + public function add_notice( $message, $type = 'success' ) { + $notices = get_transient( $this->notices_transient_key ); + + if ( ! is_array( $notices ) ) { + $notices = []; + } + + $notices[] = array( + 'message' => $message, + 'type' => $type, + ); + + set_transient( $this->notices_transient_key, $notices, 300 ); + } + + /** + * Render notices. + */ + public function render_notices() { + $notices = get_transient( $this->notices_transient_key ); + + if ( ! empty( $notices ) ) { + foreach ( $notices as $notice ) { + ?> +
+

+ +

+
+ notices_transient_key ); + } + } +} diff --git a/includes/Classifai/Features/TermCleanupEPIntegration.php b/includes/Classifai/Features/TermCleanupEPIntegration.php new file mode 100644 index 000000000..016441326 --- /dev/null +++ b/includes/Classifai/Features/TermCleanupEPIntegration.php @@ -0,0 +1,252 @@ +term_cleanup = $feature; + $this->es_version = Elasticsearch::factory()->get_elasticsearch_version(); + } + + /** + * Inintialize the class and register the needed hooks. + */ + public function init() { + // Vector support was added in Elasticsearch 7.0. + if ( version_compare( $this->es_version, '7.0', '<=' ) ) { + return; + } + + add_filter( 'ep_term_mapping', [ $this, 'add_term_vector_field_mapping' ] ); + add_filter( 'ep_prepare_term_meta_excluded_public_keys', [ $this, 'exclude_vector_meta' ] ); + add_filter( 'ep_term_sync_args', [ $this, 'add_vector_field_to_term_sync' ], 10, 2 ); + } + + /** + * Add our vector field mapping to the Elasticsearch term index. + * + * @param array $mapping Current mapping. + * @param int $dimensions Number of dimensions for the vector field. Default 512. + * @param bool $quantization Whether to use quantization for the vector field. Default false. + * @return array + */ + public function add_term_vector_field_mapping( array $mapping, int $dimensions = 512, bool $quantization = true ): array { + // Don't add the field if it already exists. + if ( isset( $mapping['mappings']['properties']['chunks'] ) ) { + return $mapping; + } + + // Add the default vector field mapping. + $mapping['mappings']['properties']['chunks'] = [ + 'type' => 'nested', + 'properties' => [ + 'vector' => [ + 'type' => 'dense_vector', + 'dims' => (int) $dimensions, // This needs to match the dimensions your model uses. + ], + ], + ]; + + // Add extra vector fields for newer versions of Elasticsearch. + if ( version_compare( $this->es_version, '8.0', '>=' ) ) { + // The index (true or false, default true) and similarity (l2_norm, dot_product or cosine) fields + // were added in 8.0. The similarity field must be set if index is true. + $mapping['mappings']['properties']['chunks']['properties']['vector'] = array_merge( + $mapping['mappings']['properties']['chunks']['properties']['vector'], + [ + 'index' => true, + 'similarity' => 'cosine', + ] + ); + + // The element_type field was added in 8.6. This can be either float (default) or byte. + if ( version_compare( $this->es_version, '8.6', '>=' ) ) { + $mapping['mappings']['properties']['chunks']['properties']['vector']['element_type'] = 'float'; + } + + // The int8_hnsw type was added in 8.12. + if ( $quantization && version_compare( $this->es_version, '8.12', '>=' ) ) { + // This is supposed to result in better performance but slightly less accurate results. + // See https://www.elastic.co/guide/en/elasticsearch/reference/8.13/knn-search.html#knn-search-quantized-example. + // Can test with this on and off and compare results to see what works best. + $mapping['mappings']['properties']['chunks']['properties']['vector']['index_options']['type'] = 'int8_hnsw'; + } + } + + return $mapping; + } + + /** + * Exclude our vector meta from being synced. + * + * @param array $excluded_keys Current excluded keys. + * @return array + */ + public function exclude_vector_meta( array $excluded_keys ): array { + $excluded_keys[] = $this->term_cleanup->get_embeddings_meta_key(); + + return $excluded_keys; + } + + /** + * Add the embedding data to the term vector sync args. + * + * @param array $args Current sync args. + * @param int $term_id Term ID being synced. + * @return array + */ + public function add_vector_field_to_term_sync( array $args, int $term_id ): array { + // Try to use the stored embeddings first. + $meta_key = $this->term_cleanup->get_embeddings_meta_key(); + $embeddings = get_term_meta( $term_id, $meta_key, true ); + + // If they don't exist, make API requests to generate them. + if ( ! $embeddings ) { + $provider = $this->term_cleanup->get_feature_provider_instance(); + $embeddings = $provider->generate_embeddings_for_term( $term_id, false, $this->term_cleanup ); + } + + // If we still don't have embeddings, return early. + if ( ! $embeddings || empty( $embeddings ) ) { + return $args; + } + + // Add the embeddings data to the sync args. + $args['chunks'] = []; + + foreach ( $embeddings as $embedding ) { + $args['chunks'][] = [ + 'vector' => array_map( 'floatval', $embedding ), + ]; + } + + return $args; + } + + /** + * Add the score field to the document. + * + * @param array $document Document retrieved from Elasticsearch. + * @param array $hit Raw Elasticsearch hit. + * @return array + */ + public function add_score_field_to_document( array $document, array $hit ): array { + // Add the score to the document if it exists. + if ( isset( $hit['_score'] ) ) { + $document['score'] = $hit['_score']; + } + + return $document; + } + + /** + * Run an exact k-nearest neighbor (kNN) search. + * + * @param int $term_id Term ID to search for. + * @param string $index Indexable to run the query against. Default term. + * @param int $num Number of items to return. + * @param int $threshold Threshold for the minimum score. + * @return array|WP_Error + */ + public function exact_knn_search( int $term_id, string $index = 'term', int $num = 1000, $threshold = 75 ) { + $provider = $this->term_cleanup->get_feature_provider_instance(); + $query_embedding = $provider->generate_embeddings_for_term( $term_id, false, $this->term_cleanup ); + $min_score = 1 + ( $threshold / 100 ); + + if ( is_wp_error( $query_embedding ) ) { + return $query_embedding; + } + + if ( is_array( $query_embedding ) ) { + $query_embedding = $query_embedding[0]; + } + + // Get the ElasticPress indexable. + $indexable = Indexables::factory()->get( $index ); + + if ( ! $indexable ) { + return new WP_Error( 'invalid_index', esc_html__( 'Invalid indexable provided.', 'classifai' ) ); + } + + // Build our exact kNN query. + $knn_query = [ + 'from' => 0, + 'size' => (int) $num, + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'nested' => [ + 'path' => 'chunks', + 'query' => [ + 'script_score' => [ + 'query' => [ + 'match_all' => (object) [], + ], + 'script' => [ + 'source' => 'cosineSimilarity(params.query_vector, "chunks.vector") + 1.0', + 'params' => [ + 'query_vector' => array_map( 'floatval', $query_embedding ), + ], + ], + ], + ], + ], + ], + ], + 'must_not' => [ + [ + 'term' => [ + 'term_id' => $term_id, + ], + ], + ], + ], + ], + '_source' => [ 'term_id', 'score', 'taxonomy' ], + 'min_score' => $min_score, + ]; + + // Add the score field to the document. + add_filter( 'ep_retrieve_the_term', [ $this, 'add_score_field_to_document' ], 10, 2 ); + + // Run the query using the ElasticPress indexable. + $res = $indexable->query_es( $knn_query, [] ); + + if ( false === $res || ! isset( $res['documents'] ) ) { + return new WP_Error( 'es_error', esc_html__( 'Unable to query Elasticsearch', 'classifai' ) ); + } + + return $res['documents']; + } +} diff --git a/includes/Classifai/Plugin.php b/includes/Classifai/Plugin.php index 3d61af738..78025bd6f 100644 --- a/includes/Classifai/Plugin.php +++ b/includes/Classifai/Plugin.php @@ -252,18 +252,20 @@ public function filter_plugin_action_links( $links ): array { * Load the Action Scheduler library. */ public function load_action_scheduler() { - $feature = new \Classifai\Features\Classification(); + $features = [ new \Classifai\Features\Classification(), new \Classifai\Features\TermCleanup() ]; $is_feature_being_enabled = false; - if ( isset( $_POST['classifai_feature_classification'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification.Missing - $is_feature_being_enabled = sanitize_text_field( wp_unslash( $_POST['classifai_feature_classification']['status'] ?? false ) ); // phpcs:ignore WordPress.Security.NonceVerification.Missing - } + foreach ( $features as $feature ) { + if ( isset( $_POST['classifai_feature_classification'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification.Missing + $is_feature_being_enabled = sanitize_text_field( wp_unslash( $_POST['classifai_feature_classification']['status'] ?? false ) ); // phpcs:ignore WordPress.Security.NonceVerification.Missing + } - if ( ! ( $feature->is_enabled() || '1' === $is_feature_being_enabled ) ) { - return; - } + if ( ! ( $feature->is_enabled() || '1' === $is_feature_being_enabled ) ) { + continue; + } - require_once CLASSIFAI_PLUGIN_DIR . '/vendor/woocommerce/action-scheduler/action-scheduler.php'; + require_once CLASSIFAI_PLUGIN_DIR . '/vendor/woocommerce/action-scheduler/action-scheduler.php'; + } } /** diff --git a/includes/Classifai/Services/ServicesManager.php b/includes/Classifai/Services/ServicesManager.php index cdf711089..9c76503ae 100644 --- a/includes/Classifai/Services/ServicesManager.php +++ b/includes/Classifai/Services/ServicesManager.php @@ -81,6 +81,7 @@ public function register_language_processing_features( array $features ): array '\Classifai\Features\AudioTranscriptsGeneration', '\Classifai\Features\Moderation', '\Classifai\Features\Smart404', + '\Classifai\Features\TermCleanup', ]; foreach ( $core_features as $feature ) { diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php new file mode 100644 index 000000000..618dad28e --- /dev/null +++ b/includes/Classifai/TermCleanupScheduler.php @@ -0,0 +1,248 @@ +job_name = $job_name; + } + + /** + * Initialize the class. + */ + public function init() { + add_action( $this->job_name, [ $this, 'run' ] ); + } + + /** + * Run the term cleanup job. + * + * @param array $item Item details to process. + */ + public function run( array $item = [] ) { + $action = $item['action']; + + if ( ! $action ) { + return; + } + + switch ( $action ) { + case 'term_cleanup': + $started_by = absint( $item['started_by'] ); + $taxonomy = $item['taxonomy']; + $thresold = $item['thresold']; + $term_cleanup = new TermCleanup(); + $embeddings_generated = (bool) $item['embeddings_generated']; + + $original_user_id = get_current_user_id(); + + // Set the user to the one who started the process, to avoid permission issues. + wp_set_current_user( (int) $started_by ); + + // Check if cancel request is made. + if ( isset( $item['job_id'] ) && get_transient( 'classifai_cancel_term_cleanup_process' ) === $item['job_id'] ) { + delete_transient( 'classifai_cancel_term_cleanup_process' ); + return; + } + + // Generate embeddings if not already generated. + if ( ! $embeddings_generated ) { + $results = $term_cleanup->generate_embeddings( $taxonomy ); + + if ( is_wp_error( $results ) ) { + $term_cleanup->add_notice( + // translators: %s: error message. + sprintf( esc_html__( 'Error in generating embeddings: %s', 'classifai' ), $results->get_error_message() ), + 'error' + ); + + return; + } + + // If get we false, then there are no further terms to process. + if ( false === $results ) { + $item['embeddings_generated'] = true; + $this->schedule( [ $item ] ); + return; + } + + $this->schedule( [ $item ] ); + return; + } + + // Find similar terms. + $args = array( + 'processed' => $item['processed'] ?? 0, + 'term_id' => $item['term_id'] ?? 0, + 'offset' => $item['offset'] ?? 0, + ); + $res = $term_cleanup->get_similar_terms( $taxonomy, $thresold, $args ); + + /** + * Fires when a batch of similar terms are calculated. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_get_similar_terms + * + * @param {array|bool|WP_Error} $res Response from the get_similar_terms method. + * @param {string} $taxonomy Taxonomy of terms we are comparing. + * @param {array} $args Arguments used for getting similar terms. + */ + do_action( 'classifai_feature_term_cleanup_get_similar_terms', $res, $taxonomy, $args ); + + // Restore original user. + wp_set_current_user( $original_user_id ); + + if ( is_wp_error( $res ) ) { + $term_cleanup->add_notice( + // translators: %s: error message. + sprintf( esc_html__( 'Error in finding similar terms: %s', 'classifai' ), $res->get_error_message() ), + 'error' + ); + + return; + } + + if ( false === $res ) { + $label = strtolower( $term_cleanup->get_taxonomy_label( $taxonomy, true ) ); + + // Show notice to user. + $term_cleanup->add_notice( + // translators: %s: taxonomy label. + sprintf( __( 'Process for finding similar %s has been completed.', 'classifai' ), $label ), + 'success' + ); + + // No more terms to process. + return; + } + + // Update item. + $item['processed'] = $res['processed']; + $item['term_id'] = $res['term_id']; + $item['offset'] = $res['offset']; + + $this->schedule( [ $item ] ); + return; + default: + return; + } + } + + /** + * Schedule the term cleanup job. + * + * @param array $args Arguments to pass to the job. + */ + public function schedule( array $args = [] ) { + if ( function_exists( 'as_enqueue_async_action' ) ) { + as_enqueue_async_action( $this->job_name, $args ); + } + } + + /** + * Unschedule the term cleanup job. + * + * @return bool + */ + public function unschedule() { + if ( function_exists( 'as_unschedule_all_actions' ) ) { + as_unschedule_all_actions( $this->job_name ); + + if ( ! class_exists( 'ActionScheduler_Store' ) ) { + return false; + } + + $store = ActionScheduler_Store::instance(); + + // Check if the job is still in progress. + $action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_RUNNING, + ) + ); + + // If no action running, return true. + if ( empty( $action_id ) ) { + return true; + } + + $action = $store->fetch_action( $action_id ); + $args = $action->get_args(); + if ( ! empty( $args ) && isset( $args[0]['job_id'] ) ) { + set_transient( 'classifai_cancel_term_cleanup_process', $args[0]['job_id'], 300 ); + } + + return true; + } + + return false; + } + + /** + * Check if job is in progress. + * + * @return bool + */ + public function in_progress(): bool { + if ( function_exists( 'as_has_scheduled_action' ) ) { + return as_has_scheduled_action( $this->job_name ); + } + + return false; + } + + /** + * Get the arguments for the current job. + * + * @return array|bool + */ + public function get_args() { + if ( ! class_exists( 'ActionScheduler_Store' ) ) { + return false; + } + + $store = ActionScheduler_Store::instance(); + + $running_action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_RUNNING, + ) + ); + + $pending_action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_PENDING, + ) + ); + + if ( empty( $running_action_id ) && empty( $pending_action_id ) ) { + return false; + } + + $action_id = ! empty( $running_action_id ) ? $running_action_id : $pending_action_id; + $action = $store->fetch_action( $action_id ); + $args = $action->get_args(); + + return $args; + } +} diff --git a/readme.txt b/readme.txt index 08d276c1c..02a801122 100644 --- a/readme.txt +++ b/readme.txt @@ -26,6 +26,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) * Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress) +* Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) _(note that this service has been deprecated by Microsoft and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality)_ * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) * Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) @@ -39,7 +40,8 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUNTZBNzRKNlVQSFhZMU9aV09EVzYxWFdORCQlQCN0PWcu) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. * To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account. -* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Smart 404 feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access and you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Term Cleanup feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access. For better performance, you will need [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. == Upgrade Notice == diff --git a/src/js/admin.js b/src/js/admin.js index 9817bc1bf..ab1b69d89 100644 --- a/src/js/admin.js +++ b/src/js/admin.js @@ -444,3 +444,50 @@ document.addEventListener( 'DOMContentLoaded', function () { } } ); } )( jQuery ); + +// Update the Term Cleanup status. +( function ( $ ) { + const statusWrapper = $( '.classifai-term-cleanup-process-status' ); + const processRunning = statusWrapper.length; + const taxonomy = statusWrapper.data( 'taxonomy' ); + + if ( ! processRunning || ! taxonomy ) { + return; + } + + const ajaxUrl = classifai_term_cleanup_params?.ajax_url; // eslint-disable-line camelcase + const ajaxNonce = classifai_term_cleanup_params?.ajax_nonce; // eslint-disable-line camelcase + + const intervalId = setInterval( function () { + $.ajax( { + url: ajaxUrl, + type: 'POST', + data: { + action: 'classifai_get_term_cleanup_status', + taxonomy, + nonce: ajaxNonce, + }, + success( response ) { + if ( response.success && response.data ) { + if ( response.data.is_running && response.data.status ) { + // Update the sync status on the page + statusWrapper.html( response.data.status ); + } else { + // Clear interval and reload the page. + clearInterval( intervalId ); + window.location.reload(); + } + } + }, + error( jqXHR, textStatus, errorThrown ) { + // eslint-disable-next-line no-console + console.error( + 'Error: ', + textStatus, + ', Details: ', + errorThrown + ); + }, + } ); + }, 30000 ); // 30000 milliseconds = 30 seconds +} )( jQuery ); diff --git a/src/js/settings/components/feature-additional-settings/index.js b/src/js/settings/components/feature-additional-settings/index.js index fd61ae513..99644db2e 100644 --- a/src/js/settings/components/feature-additional-settings/index.js +++ b/src/js/settings/components/feature-additional-settings/index.js @@ -19,6 +19,7 @@ import { ClassificationSettings } from './classification'; import { ModerationSettings } from './moderation'; import { Smart404Settings } from './smart-404'; import { RecommendedContentSettings } from './recommended-content'; +import { TermCleanupSettings } from './term-cleanup'; /** * Component for additional settings fields for individual features. @@ -62,6 +63,9 @@ const AdditionalSettingsFields = () => { case 'feature_recommended_content': return ; + case 'feature_term_cleanup': + return ; + default: return null; } diff --git a/src/js/settings/components/feature-additional-settings/term-cleanup.js b/src/js/settings/components/feature-additional-settings/term-cleanup.js new file mode 100644 index 000000000..122473c5d --- /dev/null +++ b/src/js/settings/components/feature-additional-settings/term-cleanup.js @@ -0,0 +1,146 @@ +/** + * WordPress dependencies + */ +import { useSelect, useDispatch } from '@wordpress/data'; +import { + CheckboxControl, + __experimentalInputControl as InputControl, // eslint-disable-line @wordpress/no-unsafe-wp-apis +} from '@wordpress/components'; +import { __ } from '@wordpress/i18n'; + +/** + * Internal dependencies + */ +import { SettingsRow } from '../settings-row'; +import { STORE_NAME } from '../../data/store'; +import { useFeatureContext } from '../feature-settings/context'; +import { getFeature } from '../../utils/utils'; + +/** + * Component for Term Cleanup feature settings. + * + * This component is used within the FeatureSettings component + * to allow users to configure the Term Cleanup feature. + * + * @return {React.ReactElement} TermCleanupSettings component. + */ +export const TermCleanupSettings = () => { + const { featureName } = useFeatureContext(); + const featureSettings = useSelect( ( select ) => + select( STORE_NAME ).getFeatureSettings() + ); + const { setFeatureSettings } = useDispatch( STORE_NAME ); + const { taxonomies = {} } = getFeature( featureName ); + + const options = Object.keys( taxonomies ).map( ( slug ) => { + return { + value: slug, + label: taxonomies[ slug ], + }; + } ); + const features = {}; + + options?.forEach( ( taxonomy ) => { + features[ taxonomy.value ] = { + label: taxonomy.label, + defaultThreshold: 75, + }; + } ); + + const Description = () => { + if ( window.classifAISettings?.isEPinstalled ) { + return __( + 'Use Elasticsearch for finding similar terms; this will speed up the process for finding similar terms.', + 'classifai' + ); + } + + return ( + <> + { __( 'Install and activate the ', 'classifai' ) } + + { __( 'ElasticPress', 'classifai' ) } + + { __( + ' plugin to use Elasticsearch for finding similar terms.', + 'classifai' + ) } + + ); + }; + + return ( + <> + } + className="settings-term-cleanup-use-ep" + > + { + setFeatureSettings( { + use_ep: value ? '1' : '0', + } ); + } } + /> + + <> + { Object.keys( features ).map( ( feature ) => { + const { defaultThreshold, label } = features[ feature ]; + return ( + + { + setFeatureSettings( { + taxonomies: { + ...featureSettings.taxonomies, + [ feature ]: value ? 1 : 0, + }, + } ); + } } + /> + { + setFeatureSettings( { + taxonomies: { + ...featureSettings.taxonomies, + [ `${ feature }_threshold` ]: value, + }, + } ); + } } + /> + + ); + } ) } + + + ); +}; diff --git a/src/scss/admin.scss b/src/scss/admin.scss index b9ad1e95f..ce378111a 100644 --- a/src/scss/admin.scss +++ b/src/scss/admin.scss @@ -389,7 +389,7 @@ input.classifai-button { border: none; margin-left: 0; background: transparent; - padding: 10px 12px; + padding: 9px 12px; position: relative; &:after { @@ -916,3 +916,154 @@ div.classifai-openai__result-disable-link { display: block; padding: 0 1em 1.5em 1em; } + +.classifai-content .classifai-term-cleanup { + margin-top: 20px; +} + +.classifai-term-cleanup .classifai-tabs.tabs-center { + margin-bottom: 24px +} + +.classifai-term-cleanup .classifai-tabs.tabs-justify { + table-layout: fixed; + width: 100% +} + +.classifai-term-cleanup .classifai-tabs a.tab { + color: #1d2327; + cursor: pointer; + display: block; + font-size: 14px; + padding: 16px 12px; + position: relative; + text-decoration: none; + transform: translateZ(0); + transition: all .3s ease; + margin-bottom: 4px; + background: #f9f9f9; +} + +.classifai-term-cleanup .classifai-tabs a.tab:focus { + box-shadow: none +} + +.classifai-term-cleanup .classifai-tabs a.tab:hover { + color: var(--classifai-admin-theme-color) +} + +.classifai-term-cleanup .classifai-tabs a.tab.active { + background: #f0f0f0; + border-radius: 4px; + box-shadow: none; + font-weight: 600 +} + +.classifai-term-cleanup .classifai-tabs a.tab.active:after { + opacity: 1; + transform: scale(1) +} + +.classifai-term-cleanup .classifai-term-cleanup-wrapper { + display: flex; + flex-direction: row; + flex-wrap: wrap; + margin-top: 20px; +} + + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper .classifai-term-cleanup-content-wrapper-field-label { + text-align: left +} + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper .classifai-term-cleanup-content-wrapper-field { + padding: 0 +} + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper .classifai-term-cleanup-content-wrapper-field-label>label { + display: block; + font-weight: 700; + margin-bottom: 0; + text-transform: uppercase +} + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper input[type=password], +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper input[type=text] { + font-size: 14px; + height: 38px; + margin-bottom: 4px; + width: 100% +} + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper .classifai-setup-footer { + margin-top: 40px +} + +@media screen and (max-width: 782px) { + .classifai-term-cleanup .classifai-term-cleanup-content-wrapper { + padding-left: 18px + } +} + +@media screen and (max-width: 600px) { + .classifai-term-cleanup .classifai-term-cleanup-content-wrapper { + margin-bottom: 20px; + padding-left: 0; + width: 100% + } +} + +.classifai-term-cleanup-process-status p{ + font-size: 14px; +} + +.classifai-term-cleanup-process-status .dashicons-yes-alt{ + color: #48be1e; +} + +.classifai-term-cleanup-process-status .button-link-delete { + color: #cc0000; +} + +.classifai-term-cleanup .classifai-term-cleanup-content-wrapper input[type=text].current-page{ + width: auto; + height: auto; +} + +table.similar_terms { + border-collapse: collapse; + border: 2px solid #c3c4c7; +} + +table.similar_terms tbody tr.border { + border-left: 2px solid #c3c4c7; + border-right: 2px solid #c3c4c7; + border-top: 2px solid #c3c4c7; +} + +table.similar_terms tbody tr.border.skip { + border-top: 0px; +} + +table.similar_terms tbody tr.border:last-child { + border-bottom: 2px solid #c3c4c7; +} + +table.similar_terms th#actions { + width: 15%; +} + +table.similar_terms .term-merge-button { + margin-top: 16px; + margin-bottom: 8px; +} + +table.similar_terms.widefat td, +table.similar_terms.widefat th { + padding: 14px; + font-size: 14px; +} + +table.similar_terms.widefat thead th { + font-weight: bold; +} diff --git a/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js new file mode 100644 index 000000000..1fe5170b1 --- /dev/null +++ b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js @@ -0,0 +1,58 @@ +describe( '[Language processing] Term Cleanup - Azure OpenAI Tests', () => { + before( () => { + cy.login(); + cy.optInAllFeatures(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( "ElasticPress option is hidden if the plugin isn't active", () => { + cy.disableElasticPress(); + + cy.visitFeatureSettings( 'language_processing/feature_term_cleanup' ); + + cy.get( '#use_ep' ).should( 'be.disabled' ); + } ); + + it( 'Can save Term Cleanup settings', () => { + cy.enableElasticPress(); + + cy.visitFeatureSettings( 'language_processing/feature_term_cleanup' ); + + // Enable Feature. + cy.enableFeature(); + + // Setup Provider. + cy.selectProvider( 'azure_openai_embeddings' ); + cy.get( 'input#azure_openai_embeddings_endpoint_url' ) + .clear() + .type( 'https://e2e-test-azure-openai.test/' ); + cy.get( 'input#azure_openai_embeddings_api_key' ) + .clear() + .type( 'password' ); + cy.get( 'input#azure_openai_embeddings_deployment' ) + .clear() + .type( 'test' ); + + // Change all settings. + cy.get( '#use_ep' ).check(); + cy.get( '#category-enabled' ).uncheck(); + cy.get( '#category-threshold' ).clear().type( 80 ); + cy.get( '#post_tag-enabled' ).check(); + cy.get( '#post_tag-threshold' ).clear().type( 80 ); + + // Save settings. + cy.saveFeatureSettings(); + + // Ensure settings page now exists. + cy.visit( + '/wp-admin/tools.php?page=classifai-term-cleanup&tax=post_tag' + ); + + cy.get( '.classifai-wrapper .submit-wrapper' ).should( 'exist' ); + + cy.disableElasticPress(); + } ); +} ); diff --git a/tests/cypress/integration/language-processing/term-cleanup-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js new file mode 100644 index 000000000..5b868a33f --- /dev/null +++ b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js @@ -0,0 +1,49 @@ +describe( '[Language processing] Term Cleanup - OpenAI Tests', () => { + before( () => { + cy.login(); + cy.optInAllFeatures(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( "ElasticPress option is hidden if the plugin isn't active", () => { + cy.disableElasticPress(); + + cy.visitFeatureSettings( 'language_processing/feature_term_cleanup' ); + + cy.get( '#use_ep' ).should( 'be.disabled' ); + } ); + + it( 'Can save Term Cleanup settings', () => { + cy.enableElasticPress(); + + cy.visitFeatureSettings( 'language_processing/feature_term_cleanup' ); + + // Enable Feature. + cy.enableFeature(); + + // Setup Provider. + cy.selectProvider( 'openai_embeddings' ); + cy.get( '#openai_api_key' ).clear().type( 'password' ); + + // Change all settings. + cy.get( '#category-enabled' ).uncheck(); + cy.get( '#category-threshold' ).clear().type( 80 ); + cy.get( '#post_tag-enabled' ).check(); + cy.get( '#post_tag-threshold' ).clear().type( 80 ); + + // Save settings. + cy.saveFeatureSettings(); + + // Ensure settings page now exists. + cy.visit( + '/wp-admin/tools.php?page=classifai-term-cleanup&tax=post_tag' + ); + + cy.get( '.classifai-wrapper .submit-wrapper' ).should( 'exist' ); + + cy.disableElasticPress(); + } ); +} );