sql >> データベース >  >> RDS >> Sqlserver

非常に複雑な SQL クエリの問題

    何が起こっているのかをもう少しわかりやすくするために、データ モデルを少し変更しました..

    CREATE TABLE [dbo].[Customer]
    (
        [CustomerName]      VARCHAR(20)     NOT NULL,
        [CustomerLink]      VARBINARY(20)   NULL
    )
    
    CREATE TABLE [dbo].[CustomerIdentification]
    (
        [CustomerName]      VARCHAR(20)     NOT NULL,
        [ID]                VARCHAR(50)     NOT NULL,
        [IDType]            VARCHAR(16)     NOT NULL
    )
    

    さらにテストデータを追加しました..

    INSERT  [dbo].[Customer]
            ([CustomerName])
    VALUES  ('Fred'),
            ('Bob'),
            ('Vince'),
            ('Tom'),
            ('Alice'),
            ('Matt'),
            ('Dan')
    
    INSERT  [dbo].[CustomerIdentification]
    VALUES  
            ('Fred',    'A',    'Passport'),
            ('Fred',    'A',    'SIN'),
            ('Fred',    'A',    'Drivers Licence'),
            ('Bob',     'A',    'Passport'),
            ('Bob',     'B',    'Drivers Licence'),
            ('Bob',     'C',    'Credit Card'),
            ('Vince',   'A',    'Passport'),
            ('Vince',   'B',    'SIN'),
            ('Vince',   'C',    'Credit Card'),
            ('Tom',     'A',    'Passport'),
            ('Tom',     'B',    'SIN'),
            ('Tom',     'B',    'Drivers Licence'),
            ('Alice',   'B',    'Drivers Licence'),
            ('Matt',    'X',    'Drivers Licence'),
            ('Dan',     'X',    'Drivers Licence')
    

    これはあなたが探しているものですか:

    ;WITH [cteNonMatchingIDs] AS (
        -- Pairs where the IDType is the same, but 
        -- name and ID don't match
        SELECT  ci3.[CustomerName] AS [CustomerName1],
                ci4.[CustomerName] AS [CustomerName2]
        FROM [dbo].[CustomerIdentification] ci3
        INNER JOIN [dbo].[CustomerIdentification] ci4
            ON ci3.[IDType] = ci4.[IDType]
        WHERE ci3.[CustomerName] <> ci4.[CustomerName]
        AND ci3.[ID] <> ci4.[ID]
    ),
    [cteMatchedPairs] AS (
        -- Pairs where the IDType and ID match, and
        -- there aren't any non matching IDs for the
        -- CustomerName
        SELECT DISTINCT 
                ci1.[CustomerName] AS [CustomerName1],
                ci2.[CustomerName] AS [CustomerName2]
        FROM [dbo].[CustomerIdentification] ci1
        LEFT JOIN [dbo].[CustomerIdentification] ci2
            ON ci1.[CustomerName] <> ci2.[CustomerName]
            AND ci1.[IDType] = ci2.[IDType] 
        WHERE ci1.[ID] = ISNULL(ci2.[ID], ci1.[ID])
        AND NOT EXISTS (
            SELECT 1
            FROM [cteNonMatchingIDs]
            WHERE ci1.[CustomerName] = [CustomerName1] -- correlated subquery
            AND ci2.[CustomerName] = [CustomerName2]
        )
        AND ci1.[CustomerName] < ci2.[CustomerName]
    ),
    [cteMatchedList] ([CustomerName], [CustomerNameList]) AS (
        -- Turn the matched pairs into list of matching
        -- CustomerNames
        SELECT  [CustomerName1],
                [CustomerNameList]
        FROM (
            SELECT  [CustomerName1],
                    CONVERT(VARCHAR(1000), '$'
                     + [CustomerName1] + '$'
                     + [CustomerName2]) AS [CustomerNameList]
            FROM [cteMatchedPairs]
            UNION ALL
            SELECT  [CustomerName2],
                    CONVERT(VARCHAR(1000), '$'
                     + [CustomerName2]) AS [CustomerNameList]
            FROM [cteMatchedPairs]
        ) [cteMatchedPairs]
        UNION ALL
        SELECT  [cteMatchedList].[CustomerName],
                CONVERT(VARCHAR(1000),[CustomerNameList] + '$'
                 + [cteMatchedPairs].[CustomerName2])
        FROM [cteMatchedList] -- recursive CTE
        INNER JOIN [cteMatchedPairs]
            ON RIGHT([cteMatchedList].[CustomerNameList],
             LEN([cteMatchedPairs].[CustomerName1])
            ) = [cteMatchedPairs].[CustomerName1]
    ),
    [cteSubstringLists] AS (
        SELECT  r1.[CustomerName],
                r2.[CustomerNameList]
        FROM [cteMatchedList] r1
        INNER JOIN [cteMatchedList] r2
            ON r2.[CustomerNameList] LIKE '%' + r1.[CustomerNameList] + '%'
    ),
    [cteCustomerLink] AS (
        SELECT DISTINCT 
                x1.[CustomerName],
                HASHBYTES('SHA1', x2.[CustomerNameList]) AS [CustomerLink]
        FROM (
            SELECT  [CustomerName],
                    MAX(LEN([CustomerNameList])) AS [MAX LEN CustomerList]
            FROM [cteSubstringLists]
            GROUP BY [CustomerName]
        ) x1
        INNER JOIN (
            SELECT  [CustomerName],
                    LEN([CustomerNameList]) AS [LEN CustomerList], 
                    [CustomerNameList]
            FROM [cteSubstringLists]
        ) x2
            ON x1.[MAX LEN CustomerList] = x2.[LEN CustomerList]
            AND x1.[CustomerName] = x2.[CustomerName]
    )
    UPDATE  c
    SET     [CustomerLink] = cl.[CustomerLink]
    FROM [dbo].[Customer] c
    INNER JOIN [cteCustomerLink] cl
        ON cl.[CustomerName] = c.[CustomerName]
    
    
    SELECT *
    FROM [dbo].[Customer]
    



    1. DBFloat列に指数値を挿入しないでください

    2. 1つのフィールドに複数の外部キーを含めることは可能ですか?

    3. MySQL除外行

    4. 日付を取得し、ユーザーのタイムゾーンに従って特定のタイムゾーンに変換します