ODPS
Max Compute/ODPS
DDL
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[STORED BY StorageHandler] -- 仅限外部表
[WITH SERDEPROPERTIES (Options)] -- 仅限外部表
[LOCATION OSSLocation];-- 仅限外部表
[LIFECYCLE days]
[AS select_statement]
create table if not exists sale_detail
(
shop_name string,
customer_id string,
total_price double
)
partitioned by (sale_date string,region string);
-- 创建一张分区表sale_detail
CREATE TABLE [IF NOT EXISTS] table_name
LIKE existing_table_name
create table sale_detail_ctas1 as
select * from sale_detail;
在_c5
、_c6
。
--- 删除表
DROP TABLE [IF EXISTS] table_name;
--- 重命名表
ALTER TABLE table_name RENAME TO new_table_name;
Select | 查询
Join
--- 左连接
select a.shop_name as ashop, b.shop_name as bshop from shop a
left outer join sale_detail b on a.shop_name=b.shop_name;
-- 由于表shop及sale_detail中都有shop_name列,因此需要在select子句中使用别名进行区分。
--- 右连接
select a.shop_name as ashop, b.shop_name as bshop from shop a
right outer join sale_detail b on a.shop_name=b.shop_name;
--- 全连接
select a.shop_name as ashop, b.shop_name as bshop from shop a
full outer join sale_detail b on a.shop_name=b.shop_name;
连接条件,只允许
Map Join
当一个大表和一个或多个小表做

-
通过
MapReduce Local Task ,将小表读入内存,生成HashTableFiles 上传至Distributed Cache 中,这里会对HashTableFiles 进行压缩。 -
MapReduce Job 在Map 阶段,每个Mapper 从Distributed Cache 读取HashTableFiles 到内存中,顺序扫描大表,在Map 阶段直接进行Join ,将数据传递给下一个MapReduce 任务。
select /* + mapjoin(a) */
a.shop_name,
b.customer_id,
b.total_price
from shop a join sale_detail b
on a.shop_name = b.shop_name;
Subquery | 子查询
在
create table shop as select * from sale_detail;
--- 子查询作为表
select a.shop_name, a.customer_id, a.total_price from
(select * from shop) a join sale_detail on a.shop_name = sale_detail.shop_name;
--- IN SUBQUERY / NOT IN SUBQUERY
SELECT * from mytable1 where id in (select id from mytable2);
--- 等效于
SELECT * from mytable1 a LEFT SEMI JOIN mytable2 b on a.id=b.id;
--- EXISTS SUBQUERY/NOT EXISTS SUBQUERY
SELECT * from mytable1 where not exists (select * from mytable2 where id = mytable1.id);
--- 等效于
SELECT * from mytable1 a LEFT ANTI JOIN mytable2 b on a.id=b.id;
--- SCALAR SUBQUERY
select * from t1 where (select count(*) from t2 where t1.a = t2.a) > 1;
-- 等效于
select t1.* from t1 left semi join (select a, count(*) from t2 group by a having count(*) > 1) t2 on t1 .a = t2.a;
UDF
package org.alidata.odps.udf.examples;
import com.aliyun.odps.udf.UDF;
public final class Lower extends UDF {
public String evaluate(String s) {
if (s == null) {
return null;
}
return s.toLowerCase();
}
}