val df = sc.parallelize(Seq(
| (0,"cat26","cat26"),
| (1,"cat67","cat26"),
| (2,"cat56","cat26"),
| (3,"cat8","cat26"))).toDF("Hour", "Category", "Value")
scala> df.na.replace("*", Map[Any, Any](
| "cat26" -> "cat23"
| )).show()
+----+--------+-----+
|Hour|Category|Value|
+----+--------+-----+
| 0| cat23|cat23|
| 1| cat67|cat23|
| 2| cat56|cat23|
| 3| cat8|cat23|
+----+--------+-----+
spark官方源码示例:org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
name是列名
df.na.replace("name", Map(
"Bob" -> "Bravo",
"Alice" -> null
df.na.replace("*", Map[Any, Any](
false -> null
替换hour列中的0为9
import com.google.common.collect.ImmutableMap;
scala> df.na.replace("hour", ImmutableMap.of(0, 9)).show()
+----+--------+-----+
|Hour|Category|Value|
+----+--------+-----+
| 9| cat26|cat26|
| 1| cat67|cat26|
| 2| cat56|cat26|
| 3| cat8|cat26|
+----+--------+-----+
替换所有列中"cat26"为"cat222"
scala> df.na.replace("*", ImmutableMap.of("cat26", "cat222")).show()
+----+--------+------+
|Hour|Category| Value|
+----+--------+------+
| 0| cat222|cat222|
| 1| cat67|cat222|
| 2| cat56|cat222|
| 3| cat8|cat222|
+----+--------+------+
spark官方源码示例:
org/apache/spark/sql/DataFrameNaFunctions.scala
* {{{
* import com.google.common.collect.ImmutableMap;
* // Replaces all occurrences of 1.0 with 2.0 in column "height".
* df.na.replace("height", ImmutableMap.of(1.0, 2.0));
* // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
* df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
* // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
* df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
* }}}