104
105 # Define headers for splitting on h1 and h2 tags.
106▶ headers_to_split_on = [("h1", "Main Topic"), ("h2", "Sub Topic")]
107
108 splitter = HTMLHeaderTextSplitter(
· · ·
127
128 # 'documents' now contains Document objects reflecting the hierarchy:
129▶ # - Document with metadata={"Main Topic": "Introduction"} and
130 # content="Introduction"
131 # - Document with metadata={"Main Topic": "Introduction"} and
· · ·
131▶ # - Document with metadata={"Main Topic": "Introduction"} and
132 # content="Welcome to the introduction section."
133 # - Document with metadata={"Main Topic": "Introduction",
· · ·
133▶ # - Document with metadata={"Main Topic": "Introduction",
134 # "Sub Topic": "Background"} and content="Background"
135 # - Document with metadata={"Main Topic": "Introduction",
· · ·
135▶ # - Document with metadata={"Main Topic": "Introduction",
136 # "Sub Topic": "Background"} and content="Some background details here."
137 # - Document with metadata={"Main Topic": "Conclusion"} and
+ 9 more matches in this file