Aktaruzzaman Aman

21
reputation
3

My work sample

  1. Data cleaning and normalizing

%%sql SELECT DISTINCT breed, TRIM(LEADING '-' FROM breed) AS breed_fixed FROM dogs ORDER BY breed_fixed

  1. DISTINCT, and ORDER BY in your own queries

%%sql select user_guid, membership_type, state, country, created_at from users where membership_type = 2 and state = "NC" and country = "us" and created_at > '2014-3-1' order by created_at desc;

  1. Summarizing Data

%%sql select gender, breed_group, count(distinct dog_guid) as num_dogs from dogs where breed_group !="" and breed_group is not null and breed_group !='none' group by 1, 2 having num_dogs >=1000 order by 3 desc

%%sql select test_name, avg(timestampdiff(hour, start_time, end_time)) as duration from exam_answers where timestampdiff(hour, start_time, end_time)<= 6000 group by 1 having avg(timestampdiff(second, start_time, end_time))>0 order by duration desc

%%sql select state, zip, count(distinct user_guid) as num_uguid from users where country ='us' group by state, zip having num_uguid>5 order by state asc, num_uguid desc

%%sql select test_name, step_type, start_time, end_time, timestampdiff(minute, start_time, end_time) as duration from exam_answers where timestampdiff(minute, start_time, end_time)<0 order by timestampdiff(minute, start_time, end_time) asc

  1. Subqueries_and_Derived_Tables

%%sql select count(distinct u.user_guid) from users u where not exists (select * from dogs d where u.user_guid = d.user_guid and d.user_guid is not null)

%%sql SELECT du.user_guid AS uUserID, d.user_guid AS dUserID FROM (select distinct user_guid from users) du LEFT JOIN dogs d ON du.user_guid=d.user_guid where du.user_guid is not null

%%sql SELECT DistinctUUsersID.user_guid AS uUserID, ddUserID.user_guid AS dUserID, count(*) AS numrows FROM (SELECT DISTINCT u.user_guid FROM users u limit 100 ) AS DistinctUUsersID LEFT JOIN (select distinct d.user_guid from dogs d ) as ddUserID ON DistinctUUsersID.user_guid=ddUserID.user_guid

GROUP BY DistinctUUsersID.user_guid ORDER BY numrows DESC;

%%sql select g.dim, avg(g.nTest) as avg_test from(select d.dog_guid as DogID, d.dimension as dim, count(c.test_name) as nTest from dogs d join complete_tests c on d.dog_guid = c.dog_guid
group by d.dog_guid, d.dimension) as g

group by g.dim

  1. Useful_Logical_Functions

%%sql SELECT CASE cleaned_users.country WHEN "US" THEN "In US" WHEN "N/A" THEN "Not Applicable" ELSE "Outside US" END AS US_user, count(cleaned_users.user_guid)
FROM (SELECT DISTINCT user_guid, country FROM users WHERE country IS NOT NULL) AS cleaned_users GROUP BY US_user