Skip to content

Commit dc4c6fb

Browse files
committed
Add sitemap index and serve sitemap for subprojects
The sitemap index lists all the sitemap locations for a project including the subprojects. The sitemap of subprojects are also served from the parent domain. Closes #6841
1 parent f4efd14 commit dc4c6fb

File tree

5 files changed

+130
-3
lines changed

5 files changed

+130
-3
lines changed

docs/user/reference/sitemaps.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@ It contains information such as:
1616
* How important this URL is in relation to other URLs in the site.
1717
* What translations are available for a page.
1818

19-
Read the Docs automatically generates a ``sitemap.xml`` for your project,
19+
Read the Docs automatically generates a ``sitemap.xml`` and a
20+
``sitemap_index.xml`` for your project,
2021

2122
By default the sitemap includes:
2223

2324
* Each version of your documentation and when it was last updated, sorted by version number.
2425

26+
By default the sitemap index includes:
27+
28+
* The location of ``sitemap.xml``
29+
* The locations of the ``sitemap.xml`` of subprojects if they are set.
30+
2531
This allows search engines to prioritize results based on the version number,
2632
sorted by `semantic versioning`_.
2733

readthedocs/proxito/tests/test_full.py

+45
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,51 @@ def test_sitemap_all_private_versions(self):
15041504
)
15051505
self.assertEqual(response.status_code, 404)
15061506

1507+
def test_sitemap_subproject(self):
1508+
self.project.versions.update(active=True)
1509+
self.subproject.versions.update(active=True)
1510+
1511+
subresponse = self.client.get(
1512+
reverse("sitemap_xml", args=["subproject"]),
1513+
headers={"host": "project.readthedocs.io"},
1514+
)
1515+
response = self.client.get(
1516+
reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"}
1517+
)
1518+
1519+
self.assertEqual(subresponse.status_code, 200)
1520+
self.assertEqual(response.status_code, 200)
1521+
self.assertEqual(subresponse.content, response.content)
1522+
1523+
def test_sitemap_index(self):
1524+
self.project.versions.update(active=True)
1525+
response = self.client.get(
1526+
reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"}
1527+
)
1528+
self.assertEqual(response.status_code, 200)
1529+
self.assertEqual(response["Content-Type"], "application/xml")
1530+
expected = dedent(
1531+
"""
1532+
<?xml version="1.0" encoding="UTF-8"?>
1533+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
1534+
1535+
<sitemap>
1536+
<loc>https://project.readthedocs.io/sitemap.xml</loc>
1537+
</sitemap>
1538+
1539+
<sitemap>
1540+
<loc>https://project.readthedocs.io/projects/subproject/sitemap.xml</loc>
1541+
</sitemap>
1542+
1543+
<sitemap>
1544+
<loc>https://project.readthedocs.io/projects/subproject-alias/sitemap.xml</loc>
1545+
</sitemap>
1546+
1547+
</sitemapindex>
1548+
"""
1549+
).lstrip()
1550+
self.assertEqual(response.content.decode(), expected)
1551+
15071552
@mock.patch(
15081553
"readthedocs.proxito.views.mixins.staticfiles_storage",
15091554
new=StaticFileSystemStorageTest(),

readthedocs/proxito/urls.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
ServeError404,
4747
ServePageRedirect,
4848
ServeRobotsTXT,
49+
ServeSitemapIndexXML,
4950
ServeSitemapXML,
5051
ServeStaticFiles,
5152
)
@@ -136,7 +137,15 @@
136137
name="proxito_404_handler",
137138
),
138139
re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"),
139-
re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"),
140+
re_path(
141+
r"^(?:projects/(?P<subproject_slug>{project_slug})/)?"
142+
r"sitemap\.xml$".format(**pattern_opts),
143+
ServeSitemapXML.as_view(),
144+
name="sitemap_xml",
145+
),
146+
re_path(
147+
r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml"
148+
),
140149
]
141150

142151
docs_urls = [

readthedocs/proxito/views/serve.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
10191019
# Extra cache tag to invalidate only this view if needed.
10201020
project_cache_tag = "sitemap.xml"
10211021

1022-
def get(self, request):
1022+
def get(self, request, subproject_slug=None):
10231023
"""
10241024
Generate and serve a ``sitemap.xml`` for a particular ``project``.
10251025
@@ -1078,6 +1078,12 @@ def changefreqs_generator():
10781078
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))
10791079

10801080
project = request.unresolved_domain.project
1081+
1082+
if subproject_slug:
1083+
project = get_object_or_404(
1084+
project.subprojects, alias=subproject_slug
1085+
).child
1086+
10811087
public_versions = Version.internal.public(
10821088
project=project,
10831089
only_active=True,
@@ -1164,6 +1170,59 @@ class ServeSitemapXML(SettingsOverrideObject):
11641170
_default_class = ServeSitemapXMLBase
11651171

11661172

1173+
class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
1174+
1175+
"""Serve sitemap_index.xml from the domain's root."""
1176+
1177+
cache_response = True
1178+
project_cache_tag = "sitemap.xml"
1179+
1180+
def get(self, request):
1181+
"""
1182+
Generate and serve a ``sitemap_index.xml`` for a ``project``.
1183+
1184+
The sitemap index is generated from the project and all sub-projects.
1185+
"""
1186+
1187+
project = request.unresolved_domain.project
1188+
1189+
locations = [
1190+
"{scheme}://{domain}/sitemap.xml".format(
1191+
scheme="https",
1192+
domain=project.subdomain(),
1193+
)
1194+
]
1195+
for subproject in project.related_projects.all():
1196+
locations.append(
1197+
"{scheme}://{domain}/projects/{subproject}/sitemap.xml".format(
1198+
scheme="https",
1199+
domain=project.subdomain(),
1200+
subproject=subproject.slug,
1201+
)
1202+
)
1203+
context = {
1204+
"locations": locations,
1205+
}
1206+
return render(
1207+
request,
1208+
"sitemap_index.xml",
1209+
context,
1210+
content_type="application/xml",
1211+
)
1212+
1213+
def _get_project(self):
1214+
# Method used by the CDNCacheTagsMixin class.
1215+
return self.request.unresolved_domain.project
1216+
1217+
def _get_version(self):
1218+
# This view isn't attached to a version.
1219+
return None
1220+
1221+
1222+
class ServeSitemapIndexXML(SettingsOverrideObject):
1223+
_default_class = ServeSitemapIndexXMLBase
1224+
1225+
11671226
class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View):
11681227

11691228
"""
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
{% for loc in locations %}
4+
<sitemap>
5+
<loc>{{ loc }}</loc>
6+
</sitemap>
7+
{% endfor %}
8+
</sitemapindex>

0 commit comments

Comments
 (0)