How can I achieve the same functionality as SQL's "offset" in Hive?
SELECT * from table LIMIT 20 OFFSET 30
Thanks!
How can I achieve the same functionality as SQL's "offset" in Hive?
SELECT * from table LIMIT 20 OFFSET 30
Thanks!
I am unaware of a built-in function or UDF that will mimic this behavior but if you are using HIVE 0.13
you could use the row_number()
function in a round-about way to get the desired result.
select pk, col_1, col_2, ... , col_n
from (
select pk, col_1, col_2, ... , col_n, row_number() OVER (ORDER by pk) as rank
from some_database.some_table
) x
where rank between 31 and 50
Limit works with 2 arguments. Limit (count) and Limit offset,count.
So please use the 2nd option. With
select salary from employee order by salary desc limit 0,1
you will get the highest salary.
Here (Offset) 0 - first row and count (1)
public class CountRatingQueryBuilder {
private static final String SCORING_TABLE_NAME = "web_resource_rating";
private final Connection connection;
private final ScoringMetadata scoringMetadata;
private final SelectSelectStep select;
private final Factory create;
public CountRatingQueryBuilder(Connection connection, ScoringMetadata scoringMetadata){
this.connection = connection;
this.scoringMetadata = scoringMetadata;
create = new Factory(this.connection, SQLDialect.MYSQL);
select = create.select();
withSelectFieldsClause();
}
public CountRatingQueryBuilder withLimit(int limit){
select.limit(limit);
return this;
}
public CountRatingQueryBuilder withRegionId(Integer regionId){
select.where(REGION_ID.field().equal(regionId));
return this;
}
public CountRatingQueryBuilder withResourceTypeId(int resourceTypeId){
select.where(RESOURCE_TYPE_ID.field().equal(resourceTypeId));
return this;
}
public CountRatingQueryBuilder withRequestTimeBetween(long beginTimestamp, long endTimestamp){
select.where(REQUEST_TIME.field().between(beginTimestamp, endTimestamp));
return this;
}
public CountRatingQueryBuilder withResourceId(int resourceId){
select.where(RESOURCE_ID.field().equal(resourceId));
return this;
}
protected void withGroupByClause(){
select.groupBy(REGION_ID.field());
select.groupBy(RESOURCE_TYPE_ID.field());
select.groupBy(RESOURCE_ID.field());
select.groupBy(CONTENT_ID.field());
}
protected void withSelectFieldsClause(){
select.select(REGION_ID.field());
select.select(RESOURCE_TYPE_ID.field());
select.select(CONTENT_ID.field());
select.select(RESOURCE_ID.field());
select.select(Factory.count(HIT_COUNT.field()).as(SUM_HIT_COUNT.fieldName()));
}
protected void withFromClause(){
select.from(SCORING_TABLE_NAME);
}
protected void withOrderByClause(){
select.orderBy(SUM_HIT_COUNT.field().desc());
}
public String build(){
withGroupByClause();
withOrderByClause();
withFromClause();
return select.getSQL().replace("offset ?","");//dirty hack for MySQL dialect. TODO: we can try to implement our own SQL dialect for Hive :)
}
public List<ResultRow> buildAndFetch(){
String sqlWithPlaceholders = build();
List<ResultRow> scoringResults = new ArrayList<ResultRow>(100);
List<Record> recordResults = create.fetch(sqlWithPlaceholders, ArrayUtils.subarray(select.getBindValues().toArray(new Object[select.getBindValues().size()]),0, select.getBindValues().size()-1));//select.fetch();
for(Record record : recordResults){
ResultRowBuilder resultRowBuilder = ResultRowBuilder.create();
resultRowBuilder.withContentType(scoringMetadata.getResourceType(record.getValue(RESOURCE_TYPE_ID.fieldName(), Integer.class)));
resultRowBuilder.withHitCount(record.getValue(SUM_HIT_COUNT.fieldName(), Long.class));
resultRowBuilder.withUrl(record.getValue(CONTENT_ID.fieldName(), String.class));
scoringResults.add(resultRowBuilder.build());
}
return scoringResults;
}
}
Hope this is the right answer which is copied from the following link : please refer jooq extend existing dialect. Adopt MySQL dialect to apache Hive dialect to understand this in detail.