{"id":30119704,"url":"https://github.com/kverma1502/sql_data_analyst_task_for_dcluttr","last_synced_at":"2025-08-10T12:35:07.819Z","repository":{"id":309004182,"uuid":"1034842247","full_name":"kverma1502/SQL_Data_Analyst_Task_For_Dcluttr","owner":"kverma1502","description":"The task was given to assess my understanding of writing complex SQL queries as a recruitment process for data analyst position in Dcluttr.","archived":false,"fork":false,"pushed_at":"2025-08-09T06:23:49.000Z","size":263,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":0,"default_branch":"main","last_synced_at":"2025-08-09T08:17:40.746Z","etag":null,"topics":["ms-sql-server","ms-sql-server-managment-studio","sql","ssms"],"latest_commit_sha":null,"homepage":"","language":null,"has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/kverma1502.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2025-08-09T05:03:00.000Z","updated_at":"2025-08-09T06:47:14.000Z","dependencies_parsed_at":"2025-08-09T08:17:43.178Z","dependency_job_id":"47a58d91-2aaa-4345-b25a-823300334c7b","html_url":"https://github.com/kverma1502/SQL_Data_Analyst_Task_For_Dcluttr","commit_stats":null,"previous_names":["kverma1502/sql_data_analyst_task_for_dcluttr"],"tags_count":null,"template":false,"template_full_name":null,"purl":"pkg:github/kverma1502/SQL_Data_Analyst_Task_For_Dcluttr","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/kverma1502%2FSQL_Data_Analyst_Task_For_Dcluttr","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/kverma1502%2FSQL_Data_Analyst_Task_For_Dcluttr/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/kverma1502%2FSQL_Data_Analyst_Task_For_Dcluttr/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/kverma1502%2FSQL_Data_Analyst_Task_For_Dcluttr/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/kverma1502","download_url":"https://codeload.github.com/kverma1502/SQL_Data_Analyst_Task_For_Dcluttr/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/kverma1502%2FSQL_Data_Analyst_Task_For_Dcluttr/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":269725724,"owners_count":24465365,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-08-10T02:00:08.965Z","response_time":71,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["ms-sql-server","ms-sql-server-managment-studio","sql","ssms"],"created_at":"2025-08-10T12:35:04.799Z","updated_at":"2025-08-10T12:35:07.807Z","avatar_url":"https://github.com/kverma1502.png","language":null,"funding_links":[],"categories":[],"sub_categories":[],"readme":"/*\nInput files used for completing the task:\nhttps://drive.google.com/drive/folders/13Ra1PQuOT5Nv5HVb2ygrW2QAKBsqv6uY\n*/\n\n--Using MS-SQL Server (SSMS tool)\n\n--TASK_1:\ncreate database dcluttr_task\n\nBULK INSERT staging_blinkit\nFROM 'D:\\Dcluttr_task\\all_blinkit_city_map.csv'\nWITH (\n    FIRSTROW = 2,\n    FIELDTERMINATOR = ',',\n    ROWTERMINATOR = '\\r\\n'\n);\n\nBULK INSERT staging_blinkit\nFROM 'D:\\Dcluttr_task\\all_blinkit_categories.csv'\nWITH (\n    FIRSTROW = 2,\n    FIELDTERMINATOR = ',',\n    ROWTERMINATOR = '\\r\\n'\n);\n\n\nBULK INSERT staging_blinkit\nFROM 'D:\\Dcluttr_task\\all_blinkit_category_scraping_stream.csv'\nWITH (\n    FIRSTROW = 2,\n    FIELDTERMINATOR = ',',\n    ROWTERMINATOR = '\\r\\n'\n);\n\n\n--#TASK_2:\n--Creating a required table\n\nCREATE TABLE blinkit_city_insights (\n    date                DATE,\n    sku_id              INT,\n    city_name           VARCHAR(100),\n    brand_id            INT,\n    brand               VARCHAR(100),\n    image_url           VARCHAR(MAX),\n    sku_name            VARCHAR(255),\n    category_id         INT,\n    category_name       VARCHAR(100),\n    sub_category_id     INT,\n    sub_category_name   VARCHAR(100),\n    est_qty_sold        INT,\n    est_sales_sp        DECIMAL(18,2),\n    est_sales_mrp       DECIMAL(18,2),\n    listed_ds_count     INT,\n    ds_count            INT,\n    wt_osa              FLOAT,\n    wt_osa_ls           FLOAT,\n    mrp                 DECIMAL(10,2),\n    sp                  DECIMAL(10,2),\n    discount            FLOAT,\n    PRIMARY KEY (date, sku_id, city_name)\n);\n\n\n-- Step 1: CTE to calculate inventory movement\n\nWITH inv_movement AS (\n    SELECT\n        s.store_id,\n        s.sku_id,\n        cm.city_name,\n        s.created_at AS curr_time,\n        s.inventory AS current_inventory,\n        LEAD(s.inventory) OVER (PARTITION BY s.sku_id, s.store_id ORDER BY s.created_at) AS next_inventory,\n        LEAD(s.created_at) OVER (PARTITION BY s.sku_id, s.store_id ORDER BY s.created_at) AS next_time,\n        DATEDIFF(HOUR, s.created_at, LEAD(s.created_at) OVER (PARTITION BY s.sku_id, s.store_id ORDER BY s.created_at)) AS hour_diff,\n        c.l1_category_id AS category_id,\n        c.l1_category AS category_name,\n        c.l2_category_id AS sub_category_id,\n        c.l2_categor AS sub_category_name,\n        s.sku_name,\n        s.brand,\n        s.brand_id,\n        s.image_url,\n        s.selling_price,\n        s.mrp\n    FROM all_blinkit_category_scraping_stream s\n    JOIN blinkit_categories c ON s.l2_category_id = c.l2_category_id\n    JOIN blinkit_city_map cm ON s.store_id = cm.store_id\n),\nestimated_sales AS (\n    SELECT\n        *,\n        CASE \n            WHEN next_inventory IS NULL THEN 0\n            WHEN current_inventory \u003e next_inventory THEN current_inventory - next_inventory\n            ELSE 0\n        END AS est_sold_units,\n        CAST(curr_time AS DATE) AS date\n    FROM inv_movement\n),\n\n-- Step 2: Aggregate estimated sales per sku-city-date\n\nsku_city_daily_sales AS (\n    SELECT\n        date,\n        sku_id,\n        city_name,\n        MIN(sku_name) AS sku_name,\n        MIN(brand_id) AS brand_id,\n        MIN(brand) AS brand,\n        MIN(image_url) AS image_url,\n        MIN(category_id) AS category_id,\n        MIN(category_name) AS category_name,\n        MIN(sub_category_id) AS sub_category_id,\n        MIN(sub_category_name) AS sub_category_name,\n        SUM(est_sold_units) AS est_qty_sold\n    FROM estimated_sales\n    GROUP BY date, sku_id, city_name\n),\n\n-- Step 3: Mode logic for selling_price\n\nprice_mode AS (\n    SELECT\n        date,\n        sku_id,\n        city_name,\n        selling_price AS sp,\n        RANK() OVER (PARTITION BY date, sku_id, city_name ORDER BY COUNT(*) DESC) AS rnk\n    FROM estimated_sales\n    GROUP BY date, sku_id, city_name, selling_price\n),\nmrp_mode AS (\n    SELECT\n        date,\n        sku_id,\n        city_name,\n        mrp,\n        RANK() OVER (PARTITION BY date, sku_id, city_name ORDER BY COUNT(*) DESC) AS rnk\n    FROM estimated_sales\n    GROUP BY date, sku_id, city_name, mrp\n),\n\n-- Step 4: Dark store count info\n\nlisted_ds_count_cte AS (\n    SELECT \n        CAST(s.created_at AS DATE) AS date,\n        s.sku_id,\n        cm.city_name,\n        COUNT(DISTINCT s.store_id) AS listed_ds_count\n    FROM all_blinkit_category_scraping_stream s\n    JOIN blinkit_city_map cm ON s.store_id = cm.store_id\n    GROUP BY CAST(s.created_at AS DATE), s.sku_id, cm.city_name\n),\nds_count_cte AS (\n    SELECT COUNT(DISTINCT store_id) AS ds_count\n    FROM all_blinkit_category_scraping_stream\n),\nin_stock_cte AS (\n    SELECT\n        CAST(s.created_at AS DATE) AS date,\n        s.sku_id,\n        cm.city_name,\n        COUNT(DISTINCT CASE WHEN s.inventory \u003e 0 THEN s.store_id END) AS in_stock_store_count\n    FROM all_blinkit_category_scraping_stream s\n    JOIN blinkit_city_map cm ON s.store_id = cm.store_id\n    GROUP BY CAST(s.created_at AS DATE), s.sku_id, cm.city_name\n)\n\n-- Step 5: Final Insert\n\nINSERT INTO blinkit_city_insights (\n    date, sku_id, city_name, brand_id, brand, image_url, sku_name,\n    category_id, category_name, sub_category_id, sub_category_name,\n    est_qty_sold, est_sales_sp, est_sales_mrp,\n    listed_ds_count, ds_count, wt_osa, wt_osa_ls,\n    mrp, sp, discount\n)\nSELECT\n    s.date,\n    s.sku_id,\n    s.city_name,\n    s.brand_id,\n    s.brand,\n    s.image_url,\n    s.sku_name,\n    s.category_id,\n    s.category_name,\n    s.sub_category_id,\n    s.sub_category_name,\n    s.est_qty_sold,\n    s.est_qty_sold * ISNULL(p.sp, 0) AS est_sales_sp,\n    s.est_qty_sold * ISNULL(m.mrp, 0) AS est_sales_mrp,\n    ISNULL(l.listed_ds_count, 0),\n    d.ds_count,\n    CAST(1.0 * ISNULL(i.in_stock_store_count, 0) / NULLIF(d.ds_count, 0) AS FLOAT) AS wt_osa,\n    CAST(1.0 * ISNULL(i.in_stock_store_count, 0) / NULLIF(l.listed_ds_count, 0) AS FLOAT) AS wt_osa_ls,\n    m.mrp,\n    p.sp,\n    CAST(1.0 * (ISNULL(m.mrp, 0) - ISNULL(p.sp, 0)) / NULLIF(m.mrp, 0) AS FLOAT) AS discount\nFROM sku_city_daily_sales s\nLEFT JOIN price_mode p ON s.date = p.date AND s.sku_id = p.sku_id AND s.city_name = p.city_name AND p.rnk = 1\nLEFT JOIN mrp_mode m ON s.date = m.date AND s.sku_id = m.sku_id AND s.city_name = m.city_name AND m.rnk = 1\nLEFT JOIN listed_ds_count_cte l ON s.date = l.date AND s.sku_id = l.sku_id AND s.city_name = l.city_name\nLEFT JOIN in_stock_cte i ON s.date = i.date AND s.sku_id = i.sku_id AND s.city_name = i.city_name\nCROSS JOIN ds_count_cte d;\n\n/*\nSHORT EXPLANATION:\nLEAD() fetches the next row's inventory and created_at values (for the same sku_id and store_id ordered by time), enabling comparison across time slots.\nSales Calculation Logic: If next_inventory \u003c current_inventory, the difference is treated as est_sold_units (i.e., estimated quantity sold in that time interval).\n*/\n\nselect * from blinkit_city_insights;\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fkverma1502%2Fsql_data_analyst_task_for_dcluttr","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fkverma1502%2Fsql_data_analyst_task_for_dcluttr","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fkverma1502%2Fsql_data_analyst_task_for_dcluttr/lists"}