本文整理汇总了Python中databricks_test_helper.Test类的典型用法代码示例。如果您正苦于以下问题:Python Test类的具体用法?Python Test怎么用?Python Test使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Test类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1:
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
from pyspark.sql.functions import lit, concat
pluralDF = wordsDF.<FILL IN>
pluralDF.show()
# COMMAND ----------
# Load in the testing code and check to see if your answer is correct
# If incorrect it will report back '1 test failed' for each failed test
# Make sure to rerun any cell you change before trying the test again
from databricks_test_helper import Test
# TEST Using DataFrame functions to add an 's' (1b)
Test.assertEquals(pluralDF.first()[0], 'cats', 'incorrect result: you need to add an s')
Test.assertEquals(pluralDF.columns, ['word'], "there should be one column named 'word'")
# COMMAND ----------
# MAGIC %md
# MAGIC ** (1c) Length of each word **
# MAGIC
# MAGIC Now use the SQL `length` function to find the number of characters in each word. The [`length` function](http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.functions.length) is found in the `pyspark.sql.functions` module.
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
from pyspark.sql.functions import length
pluralLengthsDF = pluralDF.<FILL IN>
pluralLengthsDF.show()
示例2: endpoints
# TEST Top ten error endpoints (4a)
top_10_err_urls = [(row[0], row[1]) for row in logs_sum_df.take(10)]
top_10_err_expected = [
(u'/images/NASA-logosmall.gif', 8761),
(u'/images/KSC-logosmall.gif', 7236),
(u'/images/MOSAIC-logosmall.gif', 5197),
(u'/images/USA-logosmall.gif', 5157),
(u'/images/WORLD-logosmall.gif', 5020),
(u'/images/ksclogo-medium.gif', 4728),
(u'/history/apollo/images/apollo-logo1.gif', 2907),
(u'/images/launch-logo.gif', 2811),
(u'/', 2199),
(u'/images/ksclogosmall.gif', 1622)
]
Test.assertEquals(logs_sum_df.count(), 7675, 'incorrect count for logs_sum_df')
Test.assertEquals(top_10_err_urls, top_10_err_expected, 'incorrect Top Ten failed URLs')
# COMMAND ----------
# MAGIC %md
# MAGIC ### (4b) Exercise: Number of Unique Hosts
# MAGIC
# MAGIC How many unique hosts are there in the entire log?
# MAGIC
# MAGIC There are multiple ways to find this. Try to find a more optimal way than grouping by 'host'.
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
unique_host_count = <FILL IN>
示例3: makePlural
# COMMAND ----------
# One way of completing the function
def makePlural(word):
return word + 's'
print makePlural('cat')
# COMMAND ----------
# Load in the testing code and check to see if your answer is correct
# If incorrect it will report back '1 test failed' for each failed test
# Make sure to rerun any cell you change before trying the test again
from databricks_test_helper import Test
# TEST Pluralize and test (1b)
Test.assertEquals(makePlural('rat'), 'rats', 'incorrect result: makePlural does not add an s')
# COMMAND ----------
# MAGIC %md
# MAGIC ### (1c) Apply `makePlural` to the base RDD
# MAGIC
# MAGIC Now pass each item in the base RDD into a [map()](http://spark.apache.org/docs/latest/api/python/pyspark.html#pyspark.RDD.map) transformation that applies the `makePlural()` function to each element. And then call the [collect()](http://spark.apache.org/docs/latest/api/python/pyspark.html#pyspark.RDD.collect) action to see the transformed RDD.
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
pluralRDD = wordsRDD.map(<FILL IN>)
print pluralRDD.collect()
# COMMAND ----------
示例4: vectors
# COMMAND ----------
# ANSWER
# Manually calculate your answer and represent the vector as a list of integers.
# For example, [2, 4, 8].
vectorX = [3, -6, 0]
vectorY = [4, 8, 16]
# COMMAND ----------
# TEST Scalar multiplication: vectors (1a)
# Import test library
from databricks_test_helper import Test
Test.assertEqualsHashed(vectorX, 'e460f5b87531a2b60e0f55c31b2e49914f779981',
'incorrect value for vectorX')
Test.assertEqualsHashed(vectorY, 'e2d37ff11427dbac7f833a5a7039c0de5a740b1e',
'incorrect value for vectorY')
# COMMAND ----------
# PRIVATE_TEST Scalar multiplication: vectors (1a)
Test.assertEqualsHashed(vectorX, 'e460f5b87531a2b60e0f55c31b2e49914f779981',
'incorrect value for vectorX')
Test.assertEqualsHashed(vectorY, 'e2d37ff11427dbac7f833a5a7039c0de5a740b1e',
'incorrect value for vectorY')
# COMMAND ----------
# MAGIC %md
# MAGIC ### (1b) Element-wise multiplication: vectors
示例5: step
# MAGIC %md
# MAGIC ** (3a) Compare with hash **
# MAGIC
# MAGIC Run the following cell. If you see an **ImportError**, you should verify that you added the spark_mooc_meta library to your cluster and, if necessary, repeat step (1a).
# MAGIC
# MAGIC <img src="http://spark-mooc.github.io/web-assets/images/Lab0_LibraryError.png" alt="Drawing" style="width: 600px;"/>
# COMMAND ----------
# TEST Compare with hash (2a)
# Check our testing library/package
# This should print '1 test passed.' on two lines
from databricks_test_helper import Test
twelve = 12
Test.assertEquals(twelve, 12, 'twelve should equal 12')
Test.assertEqualsHashed(twelve, '7b52009b64fd0a2a49e6d8a939753077792b0554',
'twelve, once hashed, should equal the hashed value of 12')
# COMMAND ----------
# MAGIC %md
# MAGIC ** (3b) Compare lists **
# COMMAND ----------
# TEST Compare lists (2b)
# This should print '1 test passed.'
unsortedList = [(5, 'b'), (5, 'a'), (4, 'c'), (3, 'a')]
Test.assertEquals(sorted(unsortedList), [(3, 'a'), (4, 'c'), (5, 'a'), (5, 'b')],
'unsortedList does not sort properly')
示例6: Ratings
movie_ids_with_avg_ratings_df = ratings_df.groupBy('movieId').agg(F.count(ratings_df.rating).alias("count"), F.avg(ratings_df.rating).alias("average"))
print 'movie_ids_with_avg_ratings_df:'
movie_ids_with_avg_ratings_df.show(3, truncate=False)
# Note: movie_names_df is a temporary variable, used only to separate the steps necessary
# to create the movie_names_with_avg_ratings_df DataFrame.
movie_names_df = movie_ids_with_avg_ratings_df.join(movies_df,movie_ids_with_avg_ratings_df["movieId"]==movies_df["Id"])
movie_names_with_avg_ratings_df = movie_names_df.drop("Id")
print 'movie_names_with_avg_ratings_df:'
movie_names_with_avg_ratings_df.show(3, truncate=False)
# COMMAND ----------
# TEST Movies with Highest Average Ratings (1a)
Test.assertEquals(movie_ids_with_avg_ratings_df.count(), 26744,
'incorrect movie_ids_with_avg_ratings_df.count() (expected 26744)')
movie_ids_with_ratings_take_ordered = movie_ids_with_avg_ratings_df.orderBy('MovieID').take(3)
_take_0 = movie_ids_with_ratings_take_ordered[0]
_take_1 = movie_ids_with_ratings_take_ordered[1]
_take_2 = movie_ids_with_ratings_take_ordered[2]
Test.assertTrue(_take_0[0] == 1 and _take_0[1] == 49695,
'incorrect count of ratings for movie with ID {0} (expected 49695)'.format(_take_0[0]))
Test.assertEquals(round(_take_0[2], 2), 3.92, "Incorrect average for movie ID {0}. Expected 3.92".format(_take_0[0]))
Test.assertTrue(_take_1[0] == 2 and _take_1[1] == 22243,
'incorrect count of ratings for movie with ID {0} (expected 22243)'.format(_take_1[0]))
Test.assertEquals(round(_take_1[2], 2), 3.21, "Incorrect average for movie ID {0}. Expected 3.21".format(_take_1[0]))
Test.assertTrue(_take_2[0] == 3 and _take_2[1] == 12735,
'incorrect count of ratings for movie with ID {0} (expected 12735)'.format(_take_2[0]))
Test.assertEquals(round(_take_2[2], 2), 3.15, "Incorrect average for movie ID {0}. Expected 3.15".format(_take_2[0]))
示例7: hash
# TEST Compare with hash (2a)
# Check our testing library/package
# This should print '1 test passed.' on two lines
from databricks_test_helper import Test
twelve = 12
Test.assertEquals(twelve, 12, 'twelve should equal 12')
Test.assertEqualsHashed(twelve, '7b52009b64fd0a2a49e6d8a939753077792b0554',
'twelve, once hashed, should equal the hashed value of 12')