Monday, January 25, 2010

Example Code to build and populate a Date Dimension


OK, I've found myself searching for this code a half dozen times over as many weeks. That tells me that I :
a) Need to keep better track of my source code
b) Need to share it on my blog


Caveats & Comments: 

  • This is "warts and all" utility code. It could be optimized, but for the few rows that a Date Dimension represents, why?
  • One of the few cases where Dr. Kimball and his recommends meaningful surrogate keys is in the case of the date dimension. Our keys here use a YYYYMMDD or YYYYQQ format. 
  • This builds a Date Snowflake, which provides correctly ordered keys at any level in the dimension. 
  • I usually rejoin these tables in either the DSV or a SQL View to create the structure SSAS actually consumes.
  • There is a stub for a Week Hierarchy, which sometimes is called for, and sometimes not.
  • This was built in and is compatible with SQL 2005. It runs in SQL 2008 as well. In 2008, you could change the date type from DateTime to Date and save yourself some formatting overhead down the road.









/* DDL Begin */
/****** Object:  Table [dbo].[dim_CalendarYear]    Script Date: 11/02/2009 12:34:56 ******/
CREATE TABLE [dbo].[dim_CalendarYear](
      [CalendarYearID] [int] NOT NULL,
      [CalendarYearName] [varchar](10) NULL,
 CONSTRAINT [PK_dim_CalendarYear] PRIMARY KEY CLUSTERED
(
      [CalendarYearID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]
GO

/****** Object:  Table [dbo].[dim_CalendarQuarter]    Script Date: 11/02/2009 12:34:56 ******/
CREATE TABLE [dbo].[dim_CalendarQuarter](
      [CalendarQuarterID] [int] NOT NULL,
      [CalendarQuarterNumber] [int] NULL,
      [CalendarQuarterName] [varchar](10) NULL,
      [CalendarYearID] [int] NULL,
 CONSTRAINT [PK_dim_CalendarQuarter] PRIMARY KEY CLUSTERED
(
      [CalendarQuarterID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]
GO

/****** Object:  Table [dbo].[dim_CalendarMonth]    Script Date: 11/02/2009 12:34:56 ******/
CREATE TABLE [dbo].[dim_CalendarMonth](
      [CalendarMonthID] [int] NOT NULL,
      [CalendarMonthNumber] [int] NULL,
      [CalendarMonthName] [varchar](20) NULL,
      [CalendarQuarterID] [int] NULL,
 CONSTRAINT [PK_dim_CalendarMonth] PRIMARY KEY CLUSTERED
(
      [CalendarMonthID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]
GO

/****** Object:  Table [dbo].[dim_CalendarDay]    Script Date: 11/02/2009 12:34:56 ******/
CREATE TABLE [dbo].[dim_CalendarDay](
      [TimeID] [int] NOT NULL,
      [SQLDate] [datetime] NULL,
      [CalendarMonthID] [int] NULL,
      [WeekID] [int] NULL,
 CONSTRAINT [PK_dim_Date] PRIMARY KEY CLUSTERED
(
      [TimeID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
/* DDL End */


/* Populate Begin */

DECLARE  @StartSeedDate DATETIME
DECLARE  @EndSeedDate DATETIME

SET @StartSeedDate = '01 Jan 2007'
SET @EndSeedDate   = '31 Dec 2009'
                  
-- Set defaults if none are supplied
IF @StartSeedDate IS NULL
  SET @StartSeedDate = DATEADD(YYYY,-1,CONVERT(CHAR,GETDATE(),112))

IF @EndSeedDate IS NULL
  SET @EndSeedDate = DATEADD(YYYY,1,CONVERT(CHAR,GETDATE(),112))
                    
CREATE TABLE #TMP_DIMDATE (
  SQLDATE DATETIME NULL)

WHILE @StartSeedDate <= @EndSeedDate
  BEGIN
    INSERT #TMP_DIMDATE
          (SQLDATE)
    VALUES(@StartSeedDate)
    SELECT @StartSeedDate = DATEADD(DD,1,@StartSeedDate)
  END
 
-- Year
INSERT dbo.dim_CalendarYear
SELECT DISTINCT
      DATEPART(YEAR,SQLDATE) AS [CalendarYearID]
      , DATENAME(YEAR,SQLDATE) AS [CalendarYearName]
FROM   #TMP_DIMDATE
-- SELECT * FROM dbo.dim_CalendarYear

-- Quarter
INSERT dbo.dim_CalendarQuarter
SELECT DISTINCT
      DATEPART(YYYY,SQLDATE) * 100 + DATEPART(QUARTER,SQLDATE)     AS [CalendarQuarterID]
,     DATEPART(YYYY,SQLDATE) * 100 + DATEPART(QUARTER,SQLDATE)     AS [CalendarQuarterNumber]
,     CAST(DATEPART(YYYY,SQLDATE) AS char(4)) + ' Q' + CAST(DATEPART(QUARTER,SQLDATE) AS char(1))     AS [CalendarQuarterName]
,     DATEPART(YEAR,SQLDATE) AS [CalendarYearID]
FROM   #TMP_DIMDATE
-- SELECT * FROM dbo.dim_CalendarQuarter

-- Month
INSERT dbo.dim_CalendarMonth
SELECT DISTINCT
      DATEPART(YYYY,SQLDATE) * 100 + DATEPART(MONTH,SQLDATE)     AS [CalendarMonthID]
,     DATEPART(YYYY,SQLDATE) * 100 + DATEPART(MONTH,SQLDATE)     AS [CalendarMonthNumber]
,     DATENAME(YYYY,SQLDATE) + ' ' + DATENAME(MM,SQLDATE)     AS [CalendarMonthName]
,     DATEPART(YYYY,SQLDATE) * 100 + DATEPART(QUARTER,SQLDATE)     AS [CalendarQuarterID]
FROM   #TMP_DIMDATE
-- SELECT * FROM dbo.dim_CalendarMonth

-- Day
INSERT dbo.dim_CalendarDay
SELECT DISTINCT
      DATEPART(YYYY,SQLDATE) * 10000 + DATEPART(MONTH,SQLDATE) * 100 +  DATEPART(DAY,SQLDATE)     AS [TimeID]
,     SQLDATE
,     DATEPART(YYYY,SQLDATE) * 100 + DATEPART(MONTH,SQLDATE)     AS [CalendarMonthID]
,   NULL as [ShopLocalMonthID]
FROM   #TMP_DIMDATE
-- SELECT * FROM dbo.dim_CalendarDay


/* Populate End */


No comments:

Post a Comment

Followers