# union(other) rdd = sc.parallelize([1, 1, 2, 3]) rdd.union(rdd).collect() # [1, 1, 2, 3, 1, 1, 2, 3]