val df
=
spark
.
read
.
format
(
"jdbc"
)
.
option
(
"driver"
,
"oracle.jdbc.driver.OracleDriver"
)
.
option
(
"url"
,
"jdbc:oracle:thin:@10.18.2.3:1521:dbname"
)
.
option
(
"user"
,
"***"
)
.
option
(
"password"
,
"***"
)
.
option
(
"dbtable"
,
s
"(select t.*, ROWNUM rownum__rn from ${tbname} t) b"
)
.
option
(
"fetchsize"
,
100000
)
.
option
(
"partitionColumn"
,
"rownum__rn"
)
.
option
(
"lowerBound"
,
1
)
.
option
(
"upperBound"
,
4000000
)
.
option
(
"numPartitions"
,
2
)
.
load
(
)
.
drop
(
"rownum__rn"
)
伪代码,仅帮助理解:
if partitionColumn || lowerBound || upperBound || numPartitions 有任意选项未指定,报错
if numPartitions == 1 忽略这些选项,直接读取,返回一个分区
if numPartitions > 1 && lowerBound > upperBound 报错
numPartitions = min(upperBound - lowerBound, numPartitions)
if numPartitions == 1 同情况二
else 返回numPartitions个分区
delta = (upperBound - lowerBound) / numPartitions
分区1数据条件:partitionColumn <= lowerBound + delta || partitionColumn is null
分区2数据条件:partitionColumn > lowerBound + delta && partitionColumn <= lowerBound + 2 * delta
...
最后分区数据条件:partitionColumn > lowerBound + n*delta