Investigate XML documents


If you ever encounter a situation where you need to parse a XML document and store element values in a table and all of a sudden you get the dreaded error “Truncation error”, this function will help you pinpoint the problem.
Send the XML document as an argument to the function and you will get all populated element names together with a count and a maximum length.

CREATE OR ALTER FUNCTION dbo.fn_xml_element_maxlength
(
        @content XML
)
RETURNS TABLE
AS
RETURN  WITH cte_content(element_name, namespace_name, element_value)
        AS (
                SELECT  s.n.value(N'(local-name(.))[1]', N'NVARCHAR(MAX)') AS element_name,
                        s.n.value(N'(namespace-uri(.))[1]', N'NVARCHAR(MAX)') AS namespace_name,
                        s.n.value(N'(text())[1]', N'NVARCHAR(MAX)') AS element_value
                FROM    @content.nodes(N'(//*)') AS s(n)
        )
        SELECT          TOP(2147483647)
                        cte.element_name,
                        cte.namespace_name,
                        COUNT(*) AS element_count,
                        MAX(LEN(cte.element_value)) AS element_maxlength
        FROM            cte_content AS cte
        WHERE           cte.element_value IS NOT NULL
        GROUP BY        cte.element_name,
                        cte.namespace_name
        ORDER BY        cte.element_name,
                        cte.namespace_name;

You can also do the same with attributes

CREATE OR ALTER FUNCTION dbo.fn_xml_attribute_maxlength
(
        @content XML
)
RETURNS TABLE
AS
RETURN  WITH cte_content(element_name, namespace_name, attribute_name, attribute_value)
        AS (
                SELECT  s.n.value(N'(local-name(..))[1]', N'NVARCHAR(MAX)') AS element_name,
                        s.n.value(N'(namespace-uri(..))[1]', N'NVARCHAR(MAX)') AS namespace_name,
                        s.n.value(N'(local-name(.))[1]', N'NVARCHAR(MAX)') AS attribute_name,
                        s.n.value(N'(.)[1]', N'NVARCHAR(MAX)') AS attribute_value
                FROM    @content.nodes(N'(//@*)') AS s(n)
        )
        SELECT          TOP(2147483647)
                        cte.element_name,
                        cte.namespace_name,
                        cte.attribute_name,
                        COUNT(*) AS attribute_count,
                        MAX(LEN(cte.attribute_value)) AS attribute_maxlength
        FROM            cte_content AS cte
        WHERE           cte.attribute_value IS NOT NULL
        GROUP BY        cte.element_name,
                        cte.namespace_name,
                        cte.attribute_name
        ORDER BY        cte.element_name,
                        cte.namespace_name,
                        cte.attribute_name;

You can also have them both combined, as this

WITH cte_element(element_name, namespace_name, element_value)
AS (
        SELECT  s.n.value(N'(local-name(.))[1]', N'NVARCHAR(MAX)') AS element_name,
                s.n.value(N'(namespace-uri(.))[1]', N'NVARCHAR(MAX)') AS namespace_name,
                s.n.value(N'(text())[1]', N'NVARCHAR(MAX)') AS element_value
        FROM    @content.nodes(N'(//*)') AS s(n)
), cte_attribute(element_name, namespace_name, attribute_name, attribute_value)
AS (
        SELECT  s.n.value(N'(local-name(..))[1]', N'NVARCHAR(MAX)') AS element_name,
                s.n.value(N'(namespace-uri(..))[1]', N'NVARCHAR(MAX)') AS namespace_name,
                s.n.value(N'(local-name(.))[1]', N'NVARCHAR(MAX)') AS attribute_name,
                s.n.value(N'(.)[1]', N'NVARCHAR(MAX)') AS attribute_value
        FROM    @content.nodes(N'(//@*)') AS s(n)
)
SELECT          TOP(2147483647)
                src.element_name,
                src.namespace_name,
                src.attribute_name,
                src.item_count,
                src.item_maxlength
FROM            (
                        SELECT          cte.element_name,
                                        cte.namespace_name,
                                        CAST(NULL AS NVARCHAR(MAX)) AS attribute_name,
                                        COUNT(*) AS item_count,
                                        MAX(LEN(cte.element_value)) AS item_maxlength
                        FROM            cte_element AS cte
                        WHERE           cte.element_value IS NOT NULL
                        GROUP BY        cte.element_name,
                                        cte.namespace_name

                        UNION ALL


                        SELECT          cte.element_name,
                                        cte.namespace_name,
                                        cte.attribute_name,
                                        COUNT(*) AS item_count,
                                        MAX(LEN(cte.attribute_value)) AS item_maxlength
                        FROM            cte_attribute AS cte
                        WHERE           cte.attribute_value IS NOT NULL
                        GROUP BY        cte.element_name,
                                        cte.namespace_name,
                                        cte.attribute_name
                ) AS src
ORDER BY        src.element_name,
                src.namespace_name,
                src.attribute_name;