1

I run the following command on Databricks Notebook with com.amazon.deequ:deequ:2.0.0-spark-3.1 library for checking data quality on input data, and I got error messages on certain functions a member of com.amazon.deequ.VerificationRunBuilder. Where are those checks such as isGreaterThanOrEqualTo, hasDataType, hasMinLength exist? I did check the https://github.com/awslabs/deequ/blob/master/src/main/scala/com/amazon/deequ/checks/Check.scala and they do exist in there.

%scala

import com.amazon.deequ.{VerificationSuite, VerificationResult}
import com.amazon.deequ.VerificationResult.checkResultsAsDataFrame
import com.amazon.deequ.checks.{Check, CheckLevel, CheckStatus}
import com.amazon.deequ.suggestions.{ConstraintSuggestionRunner, Rules}
import com.amazon.deequ.constraints.Constraint;

val verificationResult: VerificationResult = { VerificationSuite()
  // data to run the verification on
  .onData(df)
  // define a data quality check
  .addCheck(
    Check(CheckLevel.Error, "unitTest") 
      //.hasSize(_ >= 2) // at least 100 rows
      .hasMax("prem_amt", _ <= 2000) // max is 10000
      .hasMin("prem_amt", _ >= 1000) // max is 10000    
      //.hasCompleteness("pol_nbr", _ >= 0.95) // 95%+ non-null IPs
      .isNonNegative("prem_amt")) // should not contain negative values
      .hasMinLength("pol_nbr", _ <= 8) // minimum length is 8 
      .hasMaxLength("pol_nbr", _ <= 8) // maximum length is 8  
      .hasDataType("trans_eff_dt", ConstrainableDataTypes.Date)
      .isGreaterThanOrEqualTo("trans_eff_dt","pol_eff_dt")
  // compute metrics and verify check conditions
  .run()
}

// convert check results to a Spark data frame
val resultDataFrame = checkResultsAsDataFrame(spark, verificationResult)

resultDataFrame.show(truncate=false)

VerificationResult.successMetricsAsDataFrame(spark, verificationResult).show(truncate=false)
fullysane
  • 51
  • 1
  • 1
    is there supposed to be two closing `)` in ` .isNonNegative("prem_amt"))`? – mazaneicha Feb 02 '22 at 22:54
  • @mazaneicha: yes, your are right! thank you! I updated my code and I got error: not found: value ConstrainableDataTypes .hasDataType("prem_amt", ConstrainableDataTypes.Integral). How do I check a field's data type? – fullysane Feb 03 '22 at 15:51

0 Answers0