/*
Window Functions
Kathi Kellenberger
*/
USE AdventureWorks2014;
GO
SET NOCOUNT ON
SET STATISTICS IO ON;

GO

--Ranking functions introduced in 2005 with the OVER clause
SELECT  SalesOrderID, OrderDate, CustomerID, 
	ROW_NUMBER() OVER(ORDER BY SalesOrderID) AS RowNbr
FROM Sales.SalesOrderHeader;

--you can also partition the results
SELECT SalesOrderID, OrderDate, CustomerID, 
	ROW_NUMBER() OVER(Partition By CustomerID ORDER BY SalesOrderID) 
	AS RowNbr
FROM Sales.SalesOrderHeader;

--Rank and Dense_rank
SELECT SalesOrderID, OrderDate, CustomerID, 
	ROW_NUMBER() OVER(PARTITION BY CustomerID ORDER BY OrderDate) As RowNum,
	RANK() OVER(PARTITION BY CustomerID ORDER BY OrderDate) As Rnk,
	DENSE_RANK() OVER(PARTITION BY CustomerID ORDER BY OrderDate) As DenseRnk
FROM Sales.SalesOrderHeader
WHERE CustomerID = 11078;




SELECT SP.FirstName, SP.LastName,
	SUM(SOH.TotalDue) AS TotalSales, 
	NTILE(4) OVER(ORDER BY SUM(SOH.TotalDue)) * 1000 AS Bonus
FROM [Sales].[vSalesPerson] SP 
JOIN Sales.SalesOrderHeader SOH ON SP.BusinessEntityID = SOH.SalesPersonID 
WHERE SOH.OrderDate >= '2013-01-01' AND SOH.OrderDate < '2014-01-01'
GROUP BY FirstName, LastName;


--Interesting application: Remove duplicate rows


SELECT Col1, Col2, 
	ROW_NUMBER() OVER(ORDER BY Col1) AS RowNbr
FROM #temp;

SELECT Col1, Col2, 
	ROW_NUMBER() OVER(PARTITION BY Col1, Col2 ORDER BY Col1) AS RowNbr
	FROM #temp;

SELECT Col1, Col2, 
	ROW_NUMBER() OVER(PARTITION BY Col1, Col2 ORDER BY Col1) AS RowNbr
FROM #temp
WHERE ROW_NUMBER() OVER(PARTITION BY Col1, Col2 ORDER BY Col1) <> 1;

;WITH Rows AS(SELECT Col1, Col2, 
	ROW_NUMBER() OVER(PARTITION BY Col1, Col2 ORDER BY Col1) AS RowNbr
	FROM #temp)
SELECT Col1, Col2, RowNbr 
FROM Rows 
WHERE RowNbr <> 1;

;WITH Rows AS(SELECT Col1, Col2, 
	ROW_NUMBER() OVER(PARTITION BY Col1, Col2 ORDER BY Col1) AS RowNbr
	FROM #temp)
DELETE FROM Rows 
WHERE RowNbr <> 1;

SELECT Col1, Col2 FROM #temp;

--The first 4 orders in each year
;WITH Orders AS(
	SELECT CustomerID, SalesOrderID, OrderDate, 
		ROW_NUMBER() OVER(PARTITION BY YEAR(OrderDate) 
		ORDER BY OrderDate) AS RowNum, 
		YEAR(OrderDate) AS OrderYear
	FROM Sales.SalesOrderHeader)
SELECT CustomerID, SalesOrderID, OrderDate, OrderYear
FROM Orders
WHERE RowNum < 5;

--Islands
CREATE TABLE #Islands(Col1 INT NOT NULL ) ;

INSERT INTO #Islands (Col1) 
VALUES(1),(2),(3),(5),(6),(8),(8),(9),(10),(11),
	(12),(12),(14),(15),(18),(19);

SELECT * FROM #Islands;
/* results should look like this
1,3
5,6
8,12
14,15
18,19
*/

--Step 1: add Row_Number
SELECT Col1, Row_Number() OVER(ORDER BY Col1) FROM #Islands;

--since duplicates, change to DENSE_RANK
--Step 2: look at the difference
SELECT Col1,DENSE_RANK() OVER(ORDER BY Col1) , 
	Col1 - DENSE_RANK() OVER(ORDER BY Col1) FROM #Islands;

--Step 3: Use min and Max
;WITH I AS(SELECT Col1, Col1 - DENSE_RANK() 
	OVER(ORDER BY Col1) AS GRP FROM #Islands)
SELECT MIN(Col1) AS FirstItem, MAX(Col1) AS LastItem 
FROM I 
GROUP BY GRP;

--END RANKING DEMO








--Calculate aggregates without an aggregate query
--with window aggregate functions
--Only PARTITION BY and () supported 2005 - 2008 R2
SELECT TotalDue, OrderDate, SalesOrderID, CustomerID, 
	 SUM(TotalDue) OVER() AS TotalSales
FROM Sales.SalesOrderHeader SOH
ORDER BY CustomerID;

--Add PARTITION BY
SELECT TotalDue, OrderDate, SalesOrderID, CustomerID, 
	SUM(TotalDue) OVER(Partition by CustomerID) AS TotalCustomerSales
FROM Sales.SalesOrderHeader SOH
ORDER BY CustomerID;


--Sub-query or CTE actually better performing, but harder to write and messier
SELECT TotalDue, OrderDate, SalesOrderID, CustomerID,
	(SELECT SUM(TotalDue) FROM Sales.SalesOrderHeader a
		WHERE a.CustomerID = b.CustomerID) AS TotalCustomerSales
FROM Sales.SalesOrderHeader b
ORDER BY CustomerID;




USE AdventureWorks2012;
GO
--great for things like this, need a percent for each partition per table:
SELECT OBJECT_NAME(p.OBJECT_ID) TableName,
        ps.partition_number, 
        ps.row_count
FROM sys.data_spaces  d 
     JOIN sys.indexes i 
     JOIN (SELECT DISTINCT OBJECT_ID
             FROM sys.partitions
             WHERE partition_number > 1) p
           ON i.OBJECT_ID = p.OBJECT_ID
           ON d.data_space_id = i.data_space_id
     JOIN sys.dm_db_partition_stats ps
           ON i.OBJECT_ID = ps.OBJECT_ID and i.index_id = ps.index_id
WHERE i.index_id < 2;

SELECT OBJECT_NAME(p.OBJECT_ID) TableName,
        ps.partition_number, 
        ps.row_count, 
		ps.row_count * 100.00 /
		SUM(ps.row_count) OVER(PARTITION BY p.OBJECT_ID) 
FROM sys.data_spaces  d 
     JOIN sys.indexes i 
     JOIN (SELECT DISTINCT OBJECT_ID
             FROM sys.partitions
             WHERE partition_number > 1) p
           ON i.OBJECT_ID = p.OBJECT_ID
           ON d.data_space_id = i.data_space_id
     JOIN sys.dm_db_partition_stats ps
           ON i.OBJECT_ID = ps.OBJECT_ID and i.index_id = ps.index_id
WHERE i.index_id < 2;


--***********END Window Aggregates



USE AdventureWorks2014;
GO






--Running Totals
CREATE NONCLUSTERED INDEX TEST_2 ON Sales.SalesOrderHeader
	(CustomerID, OrderDate, TotalDue);

--old method
SELECT CustomerID, SalesOrderID, OrderDate, TotalDue,
	(SELECT SUM(TotalDue)
	FROM Sales.SalesOrderHeader B
	WHERE A.CustomerID = B.CustomerID 
	AND B.SalesOrderID <= A.SalesOrderID) AS RunningTotal
FROM Sales.SalesOrderHeader A
ORDER BY CustomerID, SalesOrderID;


SELECT  SalesOrderID,OrderDate, CustomerID, TotalDue,
	SUM(TotalDue) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID)
	 AS CustomerRunningTotal
FROM Sales.SalesOrderHeader SOH
ORDER BY CustomerID, OrderDate;



--END RUNNING TOTAL

/*
By default the frame is RANGE BETWEEN UNBOUNDED 
PRECEDING AND CURRENT ROW

But use ROW instead 

*/

/*
UNBOUNDED PRECEDING
UNBOUNDED FOLLOWING
CURRENT ROW
<integer value> FOLLOWING (ROWS only)
<integer value> PRECEDING (ROWS only)
*/

SET STATISTICS IO ON

SELECT  OrderDate, CustomerID, TotalDue,
	SUM(TotalDue) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID 
		ROWS BETWEEN UNBOUNDED PRECEDING and CURRENT ROW) AS RunningTotal,
	SUM(TotalDue) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID 
		ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS ReverseTotal
FROM Sales.SalesOrderHeader SOH
ORDER BY CustomerID, OrderDate;

SELECT  OrderDate, CustomerID, TotalDue,
	SUM(TotalDue) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID 
		RANGE BETWEEN UNBOUNDED PRECEDING and CURRENT ROW) AS RunningTotal,
	SUM(TotalDue) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID 
		RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS ReverseTotal
FROM Sales.SalesOrderHeader SOH
ORDER BY CustomerID, OrderDate;


--specify a number of rows
SELECT CustomerID, OrderDate, SalesOrderID, TotalDue,
	SUM(TotalDue) 
	OVER(Partition by CustomerID ORDER BY SalesOrderID 
	ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)  
	AS Prev2_Orders
FROM Sales.SalesOrderHeader
Order By CustomerID ;



--RANGE IS NOT FULLY IMPLEMENTED AT THIS TIME
--Works almost like ROWS, or may perform worse, so use ROWS
--**********END FRAMING













--LAG AND LEAD
--FRAME not supported
SELECT CustomerID, SalesOrderID, OrderDate, 
	 LAG(SalesOrderID) OVER( PARTITION BY CustomerID ORDER BY SalesOrderID) AS PreviousOrder,
	 LEAD(SalesOrderID) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID) AS NextOrder
     ,DATEDIFF(d,LAG(OrderDate) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID),
	 	OrderDate) AS DaysSinceLast
     ,DATEDIFF(d,OrderDate,LEAD(OrderDate) OVER(PARTITION BY CustomerID ORDER BY SalesOrderID))
		AS DaysUntilNext
FROM Sales.SalesOrderHeader 
ORDER BY CustomerID,SalesOrderID;




--Optional OFFSET and default parameters
SELECT CustomerID, SalesOrderID, OrderDate, 
	LAG(OrderDate,2,'2001-01-01') 
	OVER(PARTITION BY CustomerID ORDER BY OrderDate) AS Back2Orders 
FROM Sales.SalesOrderHeader; 


--FIRST_VALUE and LAST_VALUE
--FRAME suported -- RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW by default
SELECT CustomerID, OrderDate, TotalDue ,
      FIRST_VALUE(TotalDue) OVER(PARTITION BY CustomerID 
	  ORDER BY SalesOrderID
		) AS FirstTotalDue,
		LAST_VALUE(OrderDate) 
		OVER(PARTITION BY CustomerID ORDER BY OrderDate 
		ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
		AS LastOrderDate
FROM Sales.SalesOrderHeader;




--PERCENT_RANK and CUME_DIST
--Window frame not supported
/*
PERCENT_RANK = My score is higher than 90% of the scores
CUME_DIST = My score is at 90%
*/
SELECT COUNT(*) NumberOfOrders, Month(OrderDate) AS OrderMonth,
     PERCENT_RANK() OVER(ORDER BY COUNT(*)) * 100 AS PercentRank,
	 CUME_DIST() OVER(ORDER BY COUNT(*)) * 100 AS CumeDist
FROM Sales.SalesOrderHeader
WHERE OrderDate BETWEEN '2013-01-01' AND '2013-12-31' 
GROUP BY  Month(OrderDate);



--PERCENTILE_CONT and PERCENTILE_DISC
--Window frame not supported
/*
Find the value at a particular rank. Percentile_Cont will return
a calculated value. Percentile_disc will return an exact value
*/
SELECT COUNT(*) NumberOfOrders, Month(OrderDate) AS orderMonth,
	PERCENTILE_CONT(.5) WITHIN GROUP (ORDER BY COUNT(*)) 
		OVER() AS PercentileCont,
	PERCENTILE_DISC(.5) WITHIN GROUP (ORDER BY COUNT(*)) 
		OVER() AS PercentileDisc
FROM Sales.SalesOrderHeader
WHERE OrderDate BETWEEN '2013-01-01' AND '2013-12-31' 
GROUP BY Month(OrderDate); 
--*****END




--Stock analysis
SELECT * FROM StockAnalysis.dbo.StockHistory
ORDER BY TickerSymbol, TradeDate;

SELECT TickerSymbol, TradeDate, ClosePrice, 
	ClosePrice - LAG(ClosePrice) 
		OVER(PARTITION BY TickerSymbol 
		ORDER BY TradeDate) AS ClosePriceDIf
FROM StockAnalysis.dbo.StockHistory 
ORDER BY TickerSymbol, TradeDate;

--clean up
drop index TEST_1 on Sales.SalesOrderHeader
drop table #temp
drop index TEST_2 on Sales.SalesOrderHeader
drop table #islands