Offset functionality in Hive

Question

How can I achieve the same functionality as SQL's "offset" in Hive?

SELECT * from table LIMIT 20 OFFSET 30

Thanks!

http://stackoverflow.com/questions/11750312/hive-ql-limiting-number-of-rows-per-each-item — Daniel A. White, Sep 11 '14 at 14:00

o-90 · Answer 1 · 2014-09-12T02:48:39.370

16

I am unaware of a built-in function or UDF that will mimic this behavior but if you are using HIVE 0.13 you could use the row_number() function in a round-about way to get the desired result.

select pk, col_1, col_2, ... , col_n
from (
    select pk, col_1, col_2, ... , col_n, row_number() OVER (ORDER by pk) as rank
    from some_database.some_table
    ) x
where rank between 31 and 50

edited Sep 12 '14 at 02:48

answered Sep 12 '14 at 02:35

o-90

17,045
10
39
63

score 1 · Answer 2 · edited Jan 27 '20 at 14:16

1

Limit works with 2 arguments. Limit (count) and Limit offset,count.

So please use the 2nd option. With

select salary from employee order by salary desc limit 0,1

you will get the highest salary.

Here (Offset) 0 - first row and count (1)

edited Jan 27 '20 at 14:16

Gunnar Bernstein

6,074
2
45
67

answered Jan 27 '20 at 13:53

user12791839

11
1

score 0 · Answer 3 · edited May 23 '17 at 11:45

public class CountRatingQueryBuilder {

private static final String SCORING_TABLE_NAME = "web_resource_rating";

private final Connection connection;
private final ScoringMetadata scoringMetadata;

private final SelectSelectStep select;
private final Factory create;

public CountRatingQueryBuilder(Connection connection, ScoringMetadata scoringMetadata){
    this.connection = connection;
    this.scoringMetadata = scoringMetadata;

    create = new Factory(this.connection, SQLDialect.MYSQL);
    select = create.select();

    withSelectFieldsClause();
}

public CountRatingQueryBuilder withLimit(int limit){
    select.limit(limit);
    return this;
}

public CountRatingQueryBuilder withRegionId(Integer regionId){
    select.where(REGION_ID.field().equal(regionId));
    return this;
}

public CountRatingQueryBuilder withResourceTypeId(int resourceTypeId){
    select.where(RESOURCE_TYPE_ID.field().equal(resourceTypeId));
    return this;
}

public CountRatingQueryBuilder withRequestTimeBetween(long beginTimestamp, long endTimestamp){
    select.where(REQUEST_TIME.field().between(beginTimestamp, endTimestamp));
    return this;
}

public CountRatingQueryBuilder withResourceId(int resourceId){
    select.where(RESOURCE_ID.field().equal(resourceId));
    return this;
}



protected void withGroupByClause(){
    select.groupBy(REGION_ID.field());
    select.groupBy(RESOURCE_TYPE_ID.field());
    select.groupBy(RESOURCE_ID.field());
    select.groupBy(CONTENT_ID.field());
}

protected void withSelectFieldsClause(){
    select.select(REGION_ID.field());
    select.select(RESOURCE_TYPE_ID.field());
    select.select(CONTENT_ID.field());
    select.select(RESOURCE_ID.field());
    select.select(Factory.count(HIT_COUNT.field()).as(SUM_HIT_COUNT.fieldName()));
}

protected void withFromClause(){
    select.from(SCORING_TABLE_NAME);
}

protected void withOrderByClause(){
    select.orderBy(SUM_HIT_COUNT.field().desc());
}

public String build(){
    withGroupByClause();
    withOrderByClause();
    withFromClause();
    return select.getSQL().replace("offset ?","");//dirty hack for MySQL dialect. TODO: we can try to implement our own SQL dialect for Hive :)

}

public List<ResultRow> buildAndFetch(){
    String sqlWithPlaceholders = build();

    List<ResultRow> scoringResults = new ArrayList<ResultRow>(100);
    List<Record> recordResults = create.fetch(sqlWithPlaceholders, ArrayUtils.subarray(select.getBindValues().toArray(new Object[select.getBindValues().size()]),0, select.getBindValues().size()-1));//select.fetch();
    for(Record record : recordResults){
        ResultRowBuilder resultRowBuilder = ResultRowBuilder.create();

        resultRowBuilder.withContentType(scoringMetadata.getResourceType(record.getValue(RESOURCE_TYPE_ID.fieldName(), Integer.class)));
        resultRowBuilder.withHitCount(record.getValue(SUM_HIT_COUNT.fieldName(), Long.class));
        resultRowBuilder.withUrl(record.getValue(CONTENT_ID.fieldName(), String.class));
        scoringResults.add(resultRowBuilder.build());
    }
    return scoringResults;
}

}

Hope this is the right answer which is copied from the following link : please refer jooq extend existing dialect. Adopt MySQL dialect to apache Hive dialect to understand this in detail.

Offset functionality in Hive

3 Answers3