x = sc.parallelize([("a", 1), ("b", 4)]) y = sc.parallelize([("a", 2)]) sorted(y.rightOuterJoin(x).collect()) # [('a', (2, 1)), ('b', (None, 4))]