Redshift SQL 基本语句_Katherine_请叫我小之之的博客

Shortcut key

ctrl + enter -> run from ‘;’ to ‘;’
cmd + left(right) -> jump to head(tail) of the line
alt + left(right) -> jump to head(tail) of the words

1 select ｜ choose the data you need

1) where, limit

select * from processed.fact_dau_snapshot 
where date>='2015-12-15' and date<='2015-12-28'
limit 20;

2) as, in, group by, sum, count, order by, desc

select snsid,sum(amount) as sum_amount,count(1) as times_bought from processed.fact_revenue
where app in ('ffs.global.prod','ffs.amazon.prod')
group by snsid
order by sum_amount desc
limit 20;

3) *, case, row_number(),

select snsid,amount as first_amount,case when amount=0.99 then 1 when amount>0.99 and amount<=4.99 then 2 else 3 end payment_class from(select snsid,amount from
(select *,row_number() over (partition by snsid order by ts) as row_num from processed.fact_revenue)
where row_num=1)
limit 20;

4) join (left join | right join | full outer join) on

select vip_level,count(A.snsid) from
(select snsid from processed.fact_dau_snapshot where app='ffs.global.prod' and date='2015-12-25')A
left join
(select snsid,vip_level from processed.dim_user)B
on A.snsid=B.snsid
group by 1
order by 2 desc;

5) nullif(), trunc(), json_extract_path_text(), json_extract_array_element_text()

select date,item_id,item_type,count(1),sum(json_extract_path_text(properties,'rc_out')) as rc_out from
(select properties,trunc(ts) as date,json_extract_path_text(json_extract_array_element_text(json_extract_path_text(properties,'item_change'),0),'item_id') as item_id,
json_extract_path_text(json_extract_array_element_text(json_extract_path_text(properties,'item_change'),0),'type') as item_type from
(select *,json_extract_path_text(properties,'real_action') as real_action from public.events_raw where event='rc_transaction' and ts>='2015-12-18')
where real_action='wheel2_buy')
group by date,item_id,item_type;

6) union | union all, split_part()

select id,count(1) from
(select * from
(select split_part(json_extract_path_text(properties,'ids'),',',1) as id from public.events_raw
where event='NewOrderRefresh' and trunc(ts)='2016-01-25' and app='ffs.global.prod' ) 
union all
(select split_part(json_extract_path_text(properties,'ids'),',',2) as id from public.events_raw
where event='NewOrderRefresh' and trunc(ts)='2016-01-25' and app='ffs.global.prod')
group by 1
7）ts <-> timestamp 
SELECT TIMESTAMP 'epoch' + nullif(json_extract_path_text(properties,'garden_unlock_ts'),'')::int * INTERVAL '1 second' 
from public.events_raw 
where event='garden_level'
limit 20; 
http://www.postgresql.org/docs/9.2/static/functions-datetime.html 
 http://my.oschina.net/Kenyon/blog/57188 
2 export | export data to local 
WbExport -type=text
         -delimiter=';'
         -header=true
         -file='/Users/zhisheng/Desktop/mytemp1.txt'
         -table=newtable;
select * from ...; 
3 write tables 
create table pm.temp1 as
(select * from ...);commit; 
4 rc_out report sql script 
-- New rc_transaction report sql script
WbExport -type=text
         -delimiter=',' 
         -header=true
         -file='/Users/zhisheng/Desktop/mytemp1.txt'
         -table=newtable;
select YESTERDAY.action,YESTERDAY.field1,TODAY.action as action_today,sum_rc_today,count_rc_today,TODAY.price,TODAY.field1 as field1_today,(sum_rc_today-sum_rc)/(sum_rc*1.0) as comments from
(select * from
(select action,field1,sum(rc) as sum_rc_today,count(rc) as count_rc_today,sum(rc)/(count(rc)*1.0) as price from
(select date, snsid, level,nvl(action1, action2) as action, location,
  nvl(nvl(rc_in, rc_out),0) as rc,
    case when rc_in>0 then 1 when rc_out>0 then -1 else 0 end flow,
--subtype indicates item/achievement/quest/gallery - supplements action to know what field1 and field2 refer to
  when item!='' then 'item'
  when quest_id!='' then 'quest'
  else NULL end subtype,
--field1 indicates item_name/quest_id 
  when item!='' then json_extract_path_text(item, 'item_name')
  when quest_id!='' then quest_id
  when action1!='' then action1
  else action2 end field1,
--field2 provides additional identifiers (e.g. task_id) if necessary
  when quest_id!='' then nullif(json_extract_path_text(properties, 'task_id'),'')::int
  else NULL end field2,
--single out quantity of items if possible
  when item!='' then nullif(json_extract_path_text(item, 'quantity'),'')::int
  else NULL end quantity
(select snsid,trunc(ts) as date, properties,
json_extract_path_text (properties,'level')::int as level,
          nullif(json_extract_path_text (properties,'real_action'),'') as action1,
                    nullif(json_extract_path_text (properties,'action'),'') as action2,
          json_extract_array_element_text(json_extract_path_text (properties,'item_change'),0) as item,
          json_extract_path_text (properties,'quest_id') as quest_id,
          json_extract_path_text (properties,'location') as location,
                    nullif(json_extract_path_text (properties,'rc_out'),'')::int as rc_out,
                    nullif(json_extract_path_text (properties,'rc_in'),'')::int as rc_in
from public.events_raw
where snsid not in (select snsid from processed.cheaters)
and snsid not in (select snsid from processed.cheaters_new)
and ts>='2015-12-1 00:00:00' and ts<'2015-12-2 00:00:00'
and event='rc_transaction'
and app='ffs.global.prod'
where flow=-1 and level>=12 and level<28
group by action,field1
order by sum_rc_today desc
where price<100
limit 20) TODAY
full outer join
 -- New rc_transaction
(select * from
(select action,field1,sum(rc) as sum_rc,count(rc) as count_rc,sum(rc)/(count(rc)*1.0)::float as price from
(select date, snsid, level,nvl(action1, action2) as action, location,
  nvl(nvl(rc_in, rc_out),0) as rc,
    case when rc_in>0 then 1 when rc_out>0 then -1 else 0 end flow,
--subtype indicates item/achievement/quest/gallery - supplements action to know what field1 and field2 refer to
  when item!='' then 'item'
  when quest_id!='' then 'quest'
  else NULL end subtype,
--field1 indicates item_name/quest_id 
  when item!='' then json_extract_path_text(item, 'item_name')
  when quest_id!='' then quest_id
  when action1!='' then action1
  else action2 end field1,
--field2 provides additional identifiers (e.g. task_id) if necessary
  when quest_id!='' then nullif(json_extract_path_text(properties, 'task_id'),'')::int
  else NULL end field2,
--single out quantity of items if possible
  when item!='' then nullif(json_extract_path_text(item, 'quantity'),'')::int
  else NULL end quantity
(select snsid,trunc(ts) as date, properties,
json_extract_path_text (properties,'level')::int as level,
          nullif(json_extract_path_text (properties,'real_action'),'') as action1,
                    nullif(json_extract_path_text (properties,'action'),'') as action2,
          json_extract_array_element_text(json_extract_path_text (properties,'item_change'),0) as item,
          json_extract_path_text (properties,'quest_id') as quest_id,
          json_extract_path_text (properties,'location') as location,
                    nullif(json_extract_path_text (properties,'rc_out'),'')::int as rc_out,
                    nullif(json_extract_path_text (properties,'rc_in'),'')::int as rc_in
from public.events_raw
where snsid not in (select snsid from processed.cheaters)
and snsid not in (select snsid from processed.cheaters_new)
and ts>='2015-11-30 00:00:00' and ts<'2015-12-1 00:00:00'
and event='rc_transaction'
and app='ffs.global.prod'
where flow=-1 and level>=12 and level<28
group by action,field1
order by sum_rc desc
where price<100
limit 20) YESTERDAY
on TODAY.field1=YESTERDAY.field1
order by TODAY.sum_rc_today desc; 
1) Always write comments, comments go after – 
 2) Always count before export. 60w rows of data is the top limit 
 3) Run short script then add script to it 
 4) Validate data before you use it 
 5) Try to search before ask; stack overflow helps a lot 
Links 
http://www.postgresql.org/docs/9.3/static/index.html 
 http://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
				用于将数据从SQL Server上传到Redshift的Python脚本
该脚本的目的是使从SQL Server将数据上传到Redshift集群的手动任务自动化。 它像
1.使用BCP命令将数据从SQL Server导出到文本文件。
2.使用gzip模块将文本文件压缩为gzip。
3.将压缩文件上传到s3存储桶。
4.截断Redshift的目标表。
5.使用复制命令从压缩文件中加载数据。
示例：select tutor_user_id||student_user_id||'hello' from t1;
4、CHAR_LENGTH 函数等同于len函数，length函数
5、select charindex('fish', 'dogfish'); 返回指定字符串在字
				AWS Redshift 中设计表
AWS Redshift 是基于 PostgreSQL 的数据仓库。 那么如何创建一个表， 这个表的创建于普通的关系型数据库的表有什么不同呢？
在创建 Redshift table 时， 可以选择排序键、分配方式和压缩编码。
设计表是为了 减少 I/O 操作数和尽量减少处理查询所需的内存。
Sort key:
排序键将您的数据按照排序顺序存储在磁盘中。Amazon Redshift 查询优化程序在确定最佳查询计划时会使用排序顺序。
如何选择排序键：
根据需要频繁查询的数
				https://docs.aws.amazon.com/zh_cn/zh_cn/redshift/latest/dg/c-using-spectrum.html
https://docs.aws.amazon.com/zh_cn/zh_cn/athena/latest/ug/what-is.html
https://docs.aws.amazon.com/zh_cn/zh_cn/redshift/latest/dg/r_CREATE_EXTERNAL_SCHEMA.html
https://docs.aws
在Snowflake DB，Amazon Redshift和PostgreSQL上串行和并行运行SQL脚本的剧本。
 与一起使用，以对事件流数据进行基于SQL的计划转换。
假设安装了 ， 和 ：
 host > git clone https://github.com/snowplow/sql-runner
 host > cd sql-runner
 host > make setup-up    # Launches Consul + Postgres for testing
 host > make             # Builds sql-runn
				This is the Amazon Redshift Database Developer Guide.
Amazon Redshift is an enterprise-level, petabyte scale, fully managed data warehousing service.
This guide focuses on using Amazon Redshift to create and manage a data warehouse. If you work with
databases as a designer, software developer, or administrator, it gives you the information you need to
design, build, query, and maintain your data warehouse.
根据最近一段时间对redshift的研究,发现一些特性比较适合我们当前的业务.
1 比如它的快速恢复能力,因为这一点,我们可以尽量在redshit里面存放一定生命周期的数据,对过期的数据可以做镜像备份,真的需要再从镜像中进行恢复也是非常快的,而且恢复是一键式的,这样我们保持一个较小的高性能的redshit集群就能满足当前的需求,从成本上节约.
2 另外它也很容易扩展,可以从一个
给组spectrum库的只读权限
revoke create on schema spectrum from group data_viewer;
grant usage on schema spectrum to group data_viewer;
grant selec
1. 下载并安装Redshift插件；
2. 在Cinema 4D中打开Render Settings窗口；
3. 在Render Settings窗口中选择Redshift作为渲染器；
4. 设置渲染参数；
5. 开始渲染。
希望这个回答能帮到您！