本文整理汇总了Python中test_helper.Test.assertTrue方法的典型用法代码示例。如果您正苦于以下问题:Python Test.assertTrue方法的具体用法?Python Test.assertTrue怎么用?Python Test.assertTrue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类test_helper.Test
的用法示例。
在下文中一共展示了Test.assertTrue方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_tests
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
def run_tests():
Test.assertEquals(test_year(1945, df), [u'Mary', u'Linda', u'Barbara', u'Patricia', u'Carol'], 'incorrect top 5 names for 1945')
Test.assertEquals(test_year(1970, df), [u'Jennifer', u'Lisa', u'Kimberly', u'Michelle', u'Amy'], 'incorrect top 5 names for 1970')
Test.assertEquals(test_year(1987, df), [u'Jessica', u'Ashley', u'Amanda', u'Jennifer', u'Sarah'], 'incorrect top 5 names for 1987')
Test.assertTrue(len(test_year(1945, df)) <= 5, 'list not limited to 5 names')
Test.assertTrue(u'James' not in test_year(1945, df), 'male names not filtered')
Test.assertTrue(test_year(1945, df) != [u'Linda', u'Linda', u'Linda', u'Linda', u'Mary'], 'year not filtered')
Test.assertEqualsHashed(test_year(1880, df), "2038e2c0bb0b741797a47837c0f94dbf24123447", "incorrect top 5 names for 1880")
示例2: SparseVector
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
aDense = np.array([0., 3., 0., 4.])
aSparse = SparseVector(4, [[0,0.], [1,3.], [2,0.], [3,4.]])
bDense = np.array([0., 0., 0., 1.])
bSparse = SparseVector(4, [[0,0.], [1,0.], [2,0.], [3,1.]])
w = np.array([0.4, 3.1, -1.4, -.5])
print aDense.dot(w)
print aSparse.dot(w)
print bDense.dot(w)
print bSparse.dot(w)
# TEST Sparse Vectors
Test.assertTrue(isinstance(aSparse, SparseVector), 'aSparse needs to be an instance of SparseVector')
Test.assertTrue(isinstance(bSparse, SparseVector), 'aSparse needs to be an instance of SparseVector')
Test.assertTrue(aDense.dot(w) == aSparse.dot(w),
'dot product of aDense and w should equal dot product of aSparse and w')
Test.assertTrue(bDense.dot(w) == bSparse.dot(w),
'dot product of bDense and w should equal dot product of bSparse and w')
# ** OHE features as sparse vectors **
sampleOneOHEFeatManual = SparseVector(7,[2,3],[1.0,1.0])
sampleTwoOHEFeatManual = SparseVector(7,[1,4,5],[1.0,1.0,1.0])
sampleThreeOHEFeatManual = SparseVector(7,[0,3,6],[1.0,1.0,1.0])
# TEST OHE Features as sparse vectors
Test.assertTrue(isinstance(sampleOneOHEFeatManual, SparseVector),
示例3: lines
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
# In[80]:
# TODO: Replace <FILL IN> with appropriate code
shakespeareWordsRDD = shakespeareRDD.flatMap(lambda x: x.split(" "))
shakespeareWordCount = shakespeareWordsRDD.count()
print shakespeareWordsRDD.top(5)
print shakespeareWordCount
# In[81]:
# TEST Words from lines (4d)
# This test allows for leading spaces to be removed either before or after
# punctuation is removed.
Test.assertTrue(shakespeareWordCount == 927631 or shakespeareWordCount == 928908,
'incorrect value for shakespeareWordCount')
Test.assertEquals(shakespeareWordsRDD.top(5),
[u'zwaggerd', u'zounds', u'zounds', u'zounds', u'zounds'],
'incorrect value for shakespeareWordsRDD')
# #### ** (4e) Remove empty elements **
# #### The next step is to filter out the empty elements. Remove all entries where the word is `''`.
# In[82]:
# TODO: Replace <FILL IN> with appropriate code
shakeWordsRDD = shakespeareWordsRDD.filter(lambda x:len(x) > 0)
shakeWordCount = shakeWordsRDD.count()
print shakeWordCount
示例4: estimateCovariance
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
plt.scatter(dataCorrelated[:,0], dataCorrelated[:,1], s=14**2, c='#d6ebf2',
edgecolors='#8cbfd0', alpha=0.75)
pass
correlatedData = sc.parallelize(dataCorrelated)
meanCorrelated = correlatedData.mean()
correlatedDataZeroMean = correlatedData.map(lambda x:np.subtract(x,meanCorrelated))
print meanCorrelated
print correlatedData.take(1)
print correlatedDataZeroMean.take(1)
from test_helper import Test
Test.assertTrue(np.allclose(meanCorrelated, [49.95739037, 49.97180477]),
'incorrect value for meanCorrelated')
Test.assertTrue(np.allclose(correlatedDataZeroMean.take(1)[0], [-0.28561917, 0.10351492]),
'incorrect value for correlatedDataZeroMean')
correlatedCov = correlatedDataZeroMean.map(lambda x: np.outer(x,x)).reduce(lambda x,y:x+y)/correlatedDataZeroMean.count()
print correlatedCov
covResult = [[ 0.99558386, 0.90148989], [0.90148989, 1.08607497]]
Test.assertTrue(np.allclose(covResult, correlatedCov), 'incorrect value for correlatedCov')
def estimateCovariance(data):
meanData = data.mean()
zeroMeanData = data.map(lambda x:np.subtract(x,meanData))
correlatedMatrix = zeroMeanData.map(lambda x: np.outer(x,x)).reduce(lambda x,y:x+y)/zeroMeanData.count()
return correlatedMatrix
示例5: display
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
# ANSWER
from pyspark.ml.feature import Normalizer
normalizer = (Normalizer()
.setInputCol('features')
.setOutputCol('featureNorm')
.setP(2.0))
irisNormalized = normalizer.transform(irisTwoFeatures) # Note that we're calling transform here
display(irisNormalized)
# COMMAND ----------
# TEST
import numpy as np
firstVector = irisNormalized.select('featureNorm').map(lambda r: r[0]).first()
Test.assertTrue(np.allclose(firstVector.norm(2.0), 1.0), 'incorrect setup of normalizer')
# COMMAND ----------
# MAGIC %md
# MAGIC ## Part 3
# COMMAND ----------
# MAGIC %md
# MAGIC Let's just check and see that our norms are equal to 1.0
# COMMAND ----------
l2Norm = udf(lambda v: float(v.norm(2.0)), DoubleType())
示例6: idfs
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
idfsSmall = idfs(amazonRecToToken.union(googleRecToToken))
uniqueTokenCount = idfsSmall.count()
print idfsSmall.takeOrdered(1, lambda s: s[1])[0]
print 'There are %s unique tokens in the small datasets.' % uniqueTokenCount
# In[16]:
# TEST Implement an IDFs function (2c)
Test.assertEquals(uniqueTokenCount, 4772, 'incorrect uniqueTokenCount')
tokenSmallestIdf = idfsSmall.takeOrdered(1, lambda s: s[1])[0]
Test.assertEquals(tokenSmallestIdf[0], 'software', 'incorrect smallest IDF token')
Test.assertTrue(abs(tokenSmallestIdf[1] - 4.25531914894) < 0.0000000001,
'incorrect smallest IDF value')
# ### **(2d) Tokens with the smallest IDF**
# #### Print out the 11 tokens with the smallest IDF in the combined small dataset.
# In[17]:
smallIDFTokens = idfsSmall.takeOrdered(11, lambda s: s[1])
print smallIDFTokens
# ### **(2e) IDF Histogram**
# #### Plot a histogram of IDF values. Be sure to use appropriate scaling and bucketing for the data.
# #### First plot the histogram using `matplotlib`
示例7: Ratings
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
# (average rating, movie name, number of ratings)
movieNameWithAvgRatingsRDD = (moviesRDD
.join(movieIDsWithAvgRatingsRDD)
.map(lambda log: (log[1][1][1], log[1][0], log[1][1][0])))
print 'movieNameWithAvgRatingsRDD: %s\n' % movieNameWithAvgRatingsRDD.take(3)
# In[26]:
# TEST Movies with Highest Average Ratings (1b)
Test.assertEquals(movieIDsWithRatingsRDD.count(), 3615,
'incorrect movieIDsWithRatingsRDD.count() (expected 3615)')
movieIDsWithRatingsTakeOrdered = movieIDsWithRatingsRDD.takeOrdered(3)
Test.assertTrue(movieIDsWithRatingsTakeOrdered[0][0] == 1 and
len(list(movieIDsWithRatingsTakeOrdered[0][1])) == 993,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[0] (expected 993)')
Test.assertTrue(movieIDsWithRatingsTakeOrdered[1][0] == 2 and
len(list(movieIDsWithRatingsTakeOrdered[1][1])) == 332,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[1] (expected 332)')
Test.assertTrue(movieIDsWithRatingsTakeOrdered[2][0] == 3 and
len(list(movieIDsWithRatingsTakeOrdered[2][1])) == 299,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[2] (expected 299)')
Test.assertEquals(movieIDsWithAvgRatingsRDD.count(), 3615,
'incorrect movieIDsWithAvgRatingsRDD.count() (expected 3615)')
Test.assertEquals(movieIDsWithAvgRatingsRDD.takeOrdered(3),
[(1, (993, 4.145015105740181)), (2, (332, 3.174698795180723)),
(3, (299, 3.0468227424749164))],
'incorrect movieIDsWithAvgRatingsRDD.takeOrdered(3)')
示例8: LabeledPoint
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
label = line_array.pop(0)
return LabeledPoint(label, np.array(line_array))
parsedSamplePoints = map(parsePoint, samplePoints)
firstPointFeatures = parsedSamplePoints[0].features
firstPointLabel = parsedSamplePoints[0].label
print firstPointFeatures, firstPointLabel
d = len(firstPointFeatures)
print d
# In[16]:
# TEST Using LabeledPoint (1b)
Test.assertTrue(isinstance(firstPointLabel, float), 'label must be a float')
expectedX0 = [0.8841,0.6105,0.6005,0.4747,0.2472,0.3573,0.3441,0.3396,0.6009,0.4257,0.6049,0.4192]
Test.assertTrue(np.allclose(expectedX0, firstPointFeatures, 1e-4, 1e-4),
'incorrect features for firstPointFeatures')
Test.assertTrue(np.allclose(2001.0, firstPointLabel), 'incorrect label for firstPointLabel')
Test.assertTrue(d == 12, 'incorrect number of features')
# #### **Visualization 1: Features**
# #### First we will load and setup the visualization library. Then we will look at the raw features for 50 data points by generating a heatmap that visualizes each feature on a grey-scale and shows the variation of each feature across the 50 sample data points. The features are all between 0 and 1, with values closer to 1 represented via darker shades of grey.
# In[17]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
示例9:
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
from pyspark.ml.clustering import KMeans
# Create a KMeans Estimator and set k=3, seed=5, maxIter=20, initSteps=1
kmeans = (<FILL IN> # create KMeans
<FILL IN> # set K
<FILL IN> # seed
<FILL IN> # maxIter
<FILL IN>) # initSteps
# Call fit on the estimator and pass in our DataFrame
model = <FILL IN>
# Obtain the clusterCenters from the KMeansModel
centers = <FILL IN>
# Use the model to transform the DataFrame by adding cluster predictions
transformed = <FILL IN>
print centers
# COMMAND ----------
# TEST
import numpy as np
Test.assertTrue(np.allclose([ 0.35115296, -0.10691828], centers[0]),
'incorrect centers. check your params.')
Test.assertEquals(transformed.select('prediction').map(lambda r: r[0]).take(4), [1,1,1,1],
'incorrect predictions')
示例10: display
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
from pyspark.ml.feature import StringIndexer
stringIndexer = (<FILL IN>
.<FILL IN>
.<FILL IN>)
indexerModel = stringIndexer.<FILL IN>
irisTrainIndexed = indexerModel.<FILL IN>
display(irisTrainIndexed)
# COMMAND ----------
# TEST
from test_helper import Test
Test.assertEquals(irisTrainIndexed.select('indexed').take(50)[-1][0], 2.0, 'incorrect values in indexed column')
Test.assertTrue(irisTrainIndexed.schema.fields[2].metadata != {}, 'indexed should have metadata')
# COMMAND ----------
# MAGIC %md
# MAGIC We've updated the metadata for the field. Now we know that the field takes on three values and is nominal.
# COMMAND ----------
print irisTrainIndexed.schema.fields[1].metadata
print irisTrainIndexed.schema.fields[2].metadata
# COMMAND ----------
# MAGIC %md
# MAGIC Let's build a decision tree to classify our data.
示例11: display
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
display(irisStandardizedLength)
# COMMAND ----------
display(irisStandardizedLength.describe('sepalLength', 'standardizedLength'))
# COMMAND ----------
# MAGIC %md
# MAGIC What if instead we wanted to normalize the data? For example, we might want to normalize each set of features (per row) to have length one using an \\( l^2 \\) norm. That would cause the sum of the features squared to be one: \\( \sum_{i=1}^d x_i^2 = 1 \\). This is could be useful if we wanted to compare observations based on a distance metric like in k-means clustering.
# MAGIC
# MAGIC Normalizer can be found in [pyspark.ml.feature](https://spark.apache.org/docs/latest/api/python/pyspark.ml.html#pyspark.ml.feature.Normalizer) for Python and the [org.apache.spark.ml.feature](http://spark.apache.org/docs/latest/api/scala/#org.apache.spark.ml.feature.Normalizer) package for Scala.
# MAGIC
# MAGIC Let's implement `Normalizer` and transform our features. Make sure to use a `P` of 2.0 and to name the output column to "featureNorm". Remember that we're working with the `irisTwoFeatures` dataset.
# COMMAND ----------
# TODO: Replace <FILL IN> with appropriate code
from pyspark.ml.feature import Normalizer
normalizer = (<FILL IN>)
irisNormalized = normalizer.transform(irisTwoFeatures) # Note that we're calling transform here
display(irisNormalized)
# COMMAND ----------
# TEST
import numpy as np
firstVector = irisNormalized.select('featureNorm').map(lambda r: r[0]).first()
Test.assertTrue(np.allclose(firstVector.norm(2.0), 1.0), 'incorrect setup of normalizer')
示例12:
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
# Call fit on the estimator and pass in our DataFrame
model = <FILL IN>
# Obtain the clusterCenters from the KMeansModel
centers = <FILL IN>
# Use the model to transform the DataFrame by adding cluster predictions
transformed = <FILL IN>
print centers
# COMMAND ----------
# TEST
import numpy as np
Test.assertTrue(np.allclose([ 0.35115296, -0.10691828], centers[0]),
'incorrect centers. check your params.')
Test.assertEquals(transformed.select('prediction').map(lambda r: r[0]).take(4), [1,1,1,1],
'incorrect predictions')
# COMMAND ----------
# MAGIC %md
# MAGIC ## PART 3
# COMMAND ----------
# MAGIC %md
# MAGIC From the class hierarchy it is clear that `KMeans` is an `Estimator` while `KMeansModel` is a `Transformer`.
# COMMAND ----------
示例13: len
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
features.
"""
<FILL IN>
parsedSamplePoints = <FILL IN>
firstPointFeatures = <FILL IN>
firstPointLabel = <FILL IN>
print firstPointFeatures, firstPointLabel
d = len(firstPointFeatures)
print d
# COMMAND ----------
# TEST Using LabeledPoint (1b)
Test.assertTrue(isinstance(firstPointLabel, float), 'label must be a float')
expectedX0 = [0.8841,0.6105,0.6005,0.4747,0.2472,0.3573,0.3441,0.3396,0.6009,0.4257,0.6049,0.4192]
Test.assertTrue(np.allclose(expectedX0, firstPointFeatures, 1e-4, 1e-4),
'incorrect features for firstPointFeatures')
Test.assertTrue(np.allclose(2001.0, firstPointLabel), 'incorrect label for firstPointLabel')
Test.assertTrue(d == 12, 'incorrect number of features')
# COMMAND ----------
# MAGIC %md
# MAGIC
# MAGIC **Visualization 1: Features**
# MAGIC
# MAGIC First we will load and setup the visualization library. Then we will look at the raw features for 50 data points by generating a heatmap that visualizes each feature on a grey-scale and shows the variation of each feature across the 50 sample data points. The features are all between 0 and 1, with values closer to 1 represented via darker shades of grey.
# COMMAND ----------
示例14: len
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
movieIDsWithRatingsRDD = (ratingsRDD.map(lambda x: (x[1],x[2]))).groupByKey()
print 'movieIDsWithRatingsRDD: {}\n'.format(movieIDsWithRatingsRDD.take(3))
movieIDsWithAvgRatingsRDD = movieIDsWithRatingsRDD.map(getCountsAndAverages)
print 'movieIDsWithAvgRatingsRDD: {}\n'.format(movieIDsWithAvgRatingsRDD.take(3))
movieNameWithAvgRatingsRDD = (moviesRDD.join(movieIDsWithAvgRatingsRDD)).map(lambda x: (x[1][1][1],x[1][0],x[1][1][0])).sortBy(lambda x: x[1])
print 'movieNameWithAvgRatingsRDD: {}\n'.format(movieNameWithAvgRatingsRDD.take(3))
print movieNameWithAvgRatingsRDD.takeOrdered(3)
Test.assertEquals(movieIDsWithRatingsRDD.count(), 3615,
'incorrect movieIDsWithRatingsRDD.count() (expected 3615)')
movieIDsWithRatingsTakeOrdered = movieIDsWithRatingsRDD.takeOrdered(3)
Test.assertTrue(movieIDsWithRatingsTakeOrdered[0][0] == 1 and
len(list(movieIDsWithRatingsTakeOrdered[0][1])) == 993,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[0] (expected 993)')
Test.assertTrue(movieIDsWithRatingsTakeOrdered[1][0] == 2 and
len(list(movieIDsWithRatingsTakeOrdered[1][1])) == 332,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[1] (expected 332)')
Test.assertTrue(movieIDsWithRatingsTakeOrdered[2][0] == 3 and
len(list(movieIDsWithRatingsTakeOrdered[2][1])) == 299,
'incorrect count of ratings for movieIDsWithRatingsTakeOrdered[2] (expected 299)')
Test.assertEquals(movieIDsWithAvgRatingsRDD.count(), 3615,
'incorrect movieIDsWithAvgRatingsRDD.count() (expected 3615)')
Test.assertEquals(movieIDsWithAvgRatingsRDD.takeOrdered(3),
[(1, (993, 4.145015105740181)), (2, (332, 3.174698795180723)),
(3, (299, 3.0468227424749164))],
'incorrect movieIDsWithAvgRatingsRDD.takeOrdered(3)')
示例15: Ratings
# 需要导入模块: from test_helper import Test [as 别名]
# 或者: from test_helper.Test import assertTrue [as 别名]
titleAndRatingsTuple[1][1],
titleAndRatingsTuple[0],
titleAndRatingsTuple[1][0],
)
)
print "movieNameWithAvgRatingsRDD: %s\n" % movieNameWithAvgRatingsRDD.take(3)
# In[25]:
# TEST Movies with Highest Average Ratings (1b)
Test.assertEquals(movieIDsWithRatingsRDD.count(), 3615, "incorrect movieIDsWithRatingsRDD.count() (expected 3615)")
movieIDsWithRatingsTakeOrdered = movieIDsWithRatingsRDD.takeOrdered(3)
Test.assertTrue(
movieIDsWithRatingsTakeOrdered[0][0] == 1 and len(list(movieIDsWithRatingsTakeOrdered[0][1])) == 993,
"incorrect count of ratings for movieIDsWithRatingsTakeOrdered[0] (expected 993)",
)
Test.assertTrue(
movieIDsWithRatingsTakeOrdered[1][0] == 2 and len(list(movieIDsWithRatingsTakeOrdered[1][1])) == 332,
"incorrect count of ratings for movieIDsWithRatingsTakeOrdered[1] (expected 332)",
)
Test.assertTrue(
movieIDsWithRatingsTakeOrdered[2][0] == 3 and len(list(movieIDsWithRatingsTakeOrdered[2][1])) == 299,
"incorrect count of ratings for movieIDsWithRatingsTakeOrdered[2] (expected 299)",
)
Test.assertEquals(
movieIDsWithAvgRatingsRDD.count(), 3615, "incorrect movieIDsWithAvgRatingsRDD.count() (expected 3615)"
)
Test.assertEquals(
movieIDsWithAvgRatingsRDD.takeOrdered(3),
开发者ID:avenezia,项目名称:CS100.1x-Introduction-to-Big-Data-with-Apache-Spark,代码行数:34,代码来源:lab4_machine_learning_student.py