homeaway · pkshirsagar904 · Oct 4, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 17, 2024
diff --git a/api/pom.xml b/api/pom.xml
@@ -110,6 +110,12 @@
  <version>2.7.0</version>
  <scope>compile</scope>
  </dependency>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>2.12.0</version>
+ </dependency>
+
  <dependency>
  <groupId>org.everit.json</groupId>
  <artifactId>org.everit.json.schema</artifactId>
@@ -131,6 +137,12 @@
  </exclusion>
  </exclusions>
  </dependency>
+ <dependency>
+ <groupId>DataMigrationTool</groupId>
+ <artifactId>DataMigrationFramework</artifactId>
+ <version>1.0-SNAPSHOT</version>
+ <scope>compile</scope>
+ </dependency>
  </dependencies>
 
  <build>

diff --git a/api/src/main/java/com/homeaway/datapullclient/config/DataPullClientConfig.java b/api/src/main/java/com/homeaway/datapullclient/config/DataPullClientConfig.java
@@ -52,8 +52,8 @@ public class DataPullClientConfig {
 
  @Bean
  @Scope("prototype")
- public DataPullTask getTask(String taskId, String json, String jksFile, List<String> subnets, Map<String, List<DescribeStepRequest>> stepPipelineMap) {
- return new DataPullTask(taskId, json, jksFile, subnets, stepPipelineMap);
+ public DataPullTask getTask(String taskId, String creator, String json, String jksFile, List<String> subnets, Map<String, List<DescribeStepRequest>> stepPipelineMap) {
+ return new DataPullTask(taskId, creator, json, jksFile, subnets, stepPipelineMap);
  }
 
  @Bean

diff --git a/api/src/main/java/com/homeaway/datapullclient/process/DataPullRequestProcessor.java b/api/src/main/java/com/homeaway/datapullclient/process/DataPullRequestProcessor.java
@@ -351,7 +351,7 @@ private Boolean createBootstrapScript(Migration[] myObjects, String bootstrapFil
 
  private DataPullTask createDataPullTask(String fileS3Path, String jksFilePath, ClusterProperties properties, String jobName, String creator, String customJarFilePath, Boolean haveBootstrapAction) {
  String creatorTag = String.join(" ", Arrays.asList(creator.split(",|;")));
- DataPullTask task = config.getTask(jobName, fileS3Path, jksFilePath,rotateSubnets(),getStepForPipeline()).withClusterProperties(properties).withCustomJar(customJarFilePath).haveBootstrapAction(haveBootstrapAction)
+ DataPullTask task = config.getTask(jobName, creator, fileS3Path, jksFilePath,rotateSubnets(),getStepForPipeline()).withClusterProperties(properties).withCustomJar(customJarFilePath).haveBootstrapAction(haveBootstrapAction)
  .addTag("Creator", creatorTag).addTag("Env", Objects.toString(properties.getAwsEnv(), env)).addTag("Name", jobName)
  .addTag("AssetProtectionLevel", "99").addTag("ComponentInfo", properties.getComponentInfo())
  .addTag("Portfolio", properties.getPortfolio()).addTag("Product", properties.getProduct()).addTag("Team", properties.getTeam()).addTag("tool", "datapull")

diff --git a/api/src/main/java/com/homeaway/datapullclient/process/DataPullTask.java b/api/src/main/java/com/homeaway/datapullclient/process/DataPullTask.java
@@ -18,15 +18,23 @@
 
 import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
 import com.amazonaws.services.elasticmapreduce.model.*;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
 import com.homeaway.datapullclient.config.DataPullClientConfig;
 import com.homeaway.datapullclient.config.DataPullProperties;
 import com.homeaway.datapullclient.config.EMRProperties;
 import com.homeaway.datapullclient.input.ClusterProperties;
+import config.AppConfig;
+import core.Controller;
 import lombok.Data;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.time.Instant;
 import java.util.*;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Collectors;
@@ -42,6 +50,8 @@ public class DataPullTask implements Runnable {
 
  private final String jsonS3Path;
 
+ private final String creator;
+
  private final String s3FilePath;
 
  private final String jksS3Path;
@@ -59,7 +69,8 @@ public class DataPullTask implements Runnable {
  private final List<String> subnets ;
  private final Map<String,List<DescribeStepRequest>> stepPipelineMap;
 
- public DataPullTask(final String taskId, final String s3File, final String jksFilePath, final List<String> subnets, final Map<String,List<DescribeStepRequest>> stepPipelineMap) {
+ public DataPullTask(final String taskId, final String creator, final String s3File, final String jksFilePath, final List<String> subnets, final Map<String,List<DescribeStepRequest>> stepPipelineMap) {
+ this.creator = creator;
  s3FilePath = s3File;
  this.taskId = taskId;
  jsonS3Path = this.s3FilePath + ".json";
@@ -370,9 +381,17 @@ private RunJobFlowResult runTaskInNewCluster(final AmazonElasticMapReduce emr, f
  request.withBootstrapActions(bsConfig);
  }
  RunJobFlowResult result=emr.runJobFlow(request);
+ // Wait for the cluster to be ready
+ boolean isClusterReady = waitForClusterReady(emr, result.getJobFlowId());
+
+ if (!isClusterReady) {
+ String errorMessage = "EMR cluster failed to start. Aborting the data pull task.";
+ DataPullTask.log.error(errorMessage);
+ sendEmail(errorMessage, creator);
+ throw new RuntimeException(errorMessage);
+ }
  ListStepsResult steps = emr.listSteps(new ListStepsRequest().withClusterId(result.getJobFlowId()));
  StepSummary step = steps.getSteps().get(0);
- ;
  DescribeStepRequest ds = new DescribeStepRequest();
  ds.withClusterId(result.getJobFlowId());
  ds.withStepId(step.getId());
@@ -382,6 +401,87 @@ private RunJobFlowResult runTaskInNewCluster(final AmazonElasticMapReduce emr, f
  return result;
  }
 
+
+ private void sendEmail(String message, String creator) {
+ try {
+ // Create ObjectMapper for YAML
+ ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory());
+
+ // Load application.yml from resources
+ InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("application.yml");
+
+ // Read YAML content into JsonNode
+ JsonNode applicationConf = yamlMapper.readTree(inputStream);
+
+ AppConfig config = new AppConfig(applicationConf);
+ String pipelineName = clusterProperties.getPipelineName();
+
+ String env = clusterProperties.getAwsEnv();
+ String applicationId = clusterProperties.getApplication();
+ String subject = "Data Pull job failed for the pipeline " + pipelineName + " in " + env + " environment";
+
+ StringBuilder reportbodyHtml = new StringBuilder();
+ reportbodyHtml.append("<tr><td><h3>Errors!</h3></td><td>")
+ .append(Instant.now().toString())
+ .append("</td><td colspan=\"5\">")
+ .append(message)
+ .append("</td></tr>");
+ Controller controllerInstance = new Controller(config, pipelineName);
+ String htmlContent = controllerInstance.neatifyReportHtml(reportbodyHtml.toString(), true, true);
+ controllerInstance.SendEmail(creator, htmlContent, applicationId,
+ pipelineName, env, subject, "");
+ }
+ catch (IOException e) {
+ DataPullTask.log.error("Error while sending email", e);
+ }
+
+ }
+
+ private boolean waitForClusterReady(AmazonElasticMapReduce emrClient, String clusterId) {
+ DescribeClusterRequest describeRequest = new DescribeClusterRequest().withClusterId(clusterId);
+ int maxRetries = 60;
+ int retryIntervalSeconds = 30;
+ int retries = 0;
+
+ while (retries < maxRetries) {
+ try {
+ DescribeClusterResult describeResult = emrClient.describeCluster(describeRequest);
+ ClusterStatus status = describeResult.getCluster().getStatus();
+ String state = status.getState();
+ DataPullTask.log.info("Cluster {} is in state {}", clusterId, state);
+
+ switch (state) {
+ case "WAITING":
+ case "RUNNING":
+ DataPullTask.log.info("Cluster {} is ready.", clusterId);
+ return true;
+ case "TERMINATED_WITH_ERRORS":
+ case "TERMINATED":
+ String reason = status.getStateChangeReason().getMessage();
+ DataPullTask.log.error("Cluster {} failed to start. Reason: {}", clusterId, reason);
+ return false;
+ default:
+ // Cluster is still starting up
+ break;
+ }
+
+ // Wait before polling again
+ Thread.sleep(retryIntervalSeconds * 1000);
+ retries++;
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ DataPullTask.log.error("Interrupted while waiting for cluster to start.", e);
+ return false;
+ } catch (Exception e) {
+ DataPullTask.log.error("Error while checking cluster status.", e);
+ return false;
+ }
+ }
+
+ DataPullTask.log.error("Cluster {} did not start within the expected time.", clusterId);
+ return false;
+ }
+
  private JobFlowInstancesConfig getJobFlowInstancesConfig(EMRProperties emrProperties,
  ClusterProperties clusterProperties,
  DataPullProperties dataPullProperties) {