Augment build summary view with known infrastructure error info (#144)

This change adds a "Known Infrastructure Errors" section to any build summary page that has errors. The new section contains a bulleted list of failed tasks that we have found to have infrastructure issues. Each such task has a sublist of the infrastructure issues we found. We find infrastructure issues by running regex patterns against each failed task's associated log. The regex patterns come from a json file at a specific known path in the Tools repo.
alejandro5042 · Mar 15, 2021 · 5c0fb91 · 5c0fb91
1 parent d2c1059
commit 5c0fb91
Show file tree

Hide file tree

Showing 2 changed files with 174 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -143,6 +143,10 @@ This button will be added to binary files with these extensions only:
 >
 > This is a preview feature. Please contact Humberto Garza for more details and instructions.
 
+### Augmented build results view
+
+At NI, the build results view will have an additional "Known Infrastructure Errors" section that lists known infrastructure errors under the build step that they occurred in. Additionally, in the Errors section, each error item from a build step that had infrastucture errors will be decorated with a warning indicator. All this is intended to make it easier to tell which errors are actual developer errors that need to be fixed.
+
 ## Documentation
 
 - [Support and troubleshooting](SUPPORT.md)

diff --git a/src/azdo-pr-dashboard.user.js b/src/azdo-pr-dashboard.user.js
@@ -1,7 +1,7 @@
 // ==UserScript==
 
 // @name         AzDO Pull Request Improvements
-// @version      2.53.1
+// @version      2.54.0
 // @author       Alejandro Barreto (National Instruments)
 // @description  Adds sorting and categorization to the PR dashboard. Also adds minor improvements to the PR diff experience, such as a base update selector and per-file checkboxes.
 // @license      MIT
@@ -72,6 +72,7 @@
     if (atNI) {
       watchForDiffHeaders();
       watchFilesTree();
+      watchForKnownBuildErrors(pageData);
     }
 
     // Handle any existing elements, flushing it to execute immediately.
@@ -1083,6 +1084,174 @@
     });
   }
 
+  function watchForKnownBuildErrors(pageData) {
+    addStyleOnce('known-build-errors-css', /* css */ `
+      .infra-errors-card h3 {
+        margin-top: 0;
+        display: inline-block;
+      }
+      .loading-indicator {
+        margin-left: 3ch;
+      }
+      .task-list {
+        margin-top: 0;
+      }
+      .infra-errors-card ul {
+        margin-bottom: 0;
+        margin-left: 4ch;
+      }
+      .infra-errors-card li {
+        margin-bottom: 0.5em;
+        list-style-type: disc;
+      }
+      .infra-errors-card li li {
+        margin-bottom: 0;
+        list-style-type: disc;
+        opacity: 0.7;
+      }
+      .infra-errors-card li span {
+        margin-bottom: 0.5em;
+      }`);
+    eus.onUrl(/\/_build\/results\?buildId=\d+&view=results/gi, (session, urlMatch) => {
+      session.onEveryNew(document, '.run-details-tab-content', async tabContent => {
+        const runDetails = pageData['ms.vss-build-web.run-details-data-provider'];
+        const projectId = pageData['ms.vss-tfs-web.page-data'].project.id;
+        const buildId = runDetails.id;
+        const pipelineName = runDetails.pipeline.name;
+
+        const actualBuildId = parseInt(urlMatch[0].match(/\d+/)[0], 10);
+        if (buildId !== actualBuildId) {
+          // eslint-disable-next-line no-restricted-globals
+          location.reload();
+        }
+
+        if (!runDetails.issues) {
+          return; // do not even add an empty section
+        }
+
+        let queryResponse;
+        try {
+          queryResponse = await fetch(`${azdoApiBaseUrl}/DevCentral/_apis/git/repositories/tools/items?path=/report/build_failure_analysis/pipeline-results/known-issues.json&api-version=6.0`);
+        } catch (err) {
+          console.warn('Could not fetch known issues file from AzDO');
+          return;
+        }
+        const knownIssues = await queryResponse.json();
+        if (!knownIssues.version.match(/^1(\.\d+)?$/)) {
+          console.warn(`Version ${knownIssues.version} of known-issues.json is not one I know what to do with`);
+          return;
+        }
+
+        if (!(new RegExp(knownIssues.pipeline_match).test(pipelineName))) {
+          return; // do not even add an empty section
+        }
+
+        const flexColumn = tabContent.children[0];
+        const summaryCard = flexColumn.children[1];
+        const newCard = $('<div class="infra-errors-card margin-top-16 depth-8 bolt-card bolt-card-white"><div>')[0];
+        const newCardContent = $('<div class="bolt-card-content bolt-default-horizontal-spacing"><div>');
+        newCardContent.appendTo(newCard);
+        summaryCard.insertAdjacentElement('afterend', newCard);
+        $('<h3>Known Infrastructure Errors</h3><span class="loading-indicator">Loading...</span>').appendTo(newCardContent);
+
+        // Fetch build timeline (which contains records with log urls)
+        queryResponse = await fetch(`${azdoApiBaseUrl}/${projectId}/_apis/build/builds/${buildId}/timeline?api-version=6.0`);
+        const timeline = await queryResponse.json();
+
+        // Fetch build logs, which give us line counts
+        queryResponse = await fetch(`${azdoApiBaseUrl}/${projectId}/_apis/build/builds/${buildId}/logs?api-version=6.0`);
+        const logsJson = (await queryResponse.json()).value;
+
+        const infraErrorsList = $('<ul class="task-list"></ul>');
+        infraErrorsList.appendTo(newCardContent);
+
+        const tasksWithInfraErrors = [];
+        let numTasksAdded = 0;
+
+        // For each task with issues
+        for (let i = 0; i < runDetails.issues.length; i += 1) {
+          let infraErrorCount = 0;
+          const taskWithIssues = runDetails.issues[i];
+          const componentListItem = $(`<li>${taskWithIssues.taskName}</li>`);
+          const componentSublist = $('<ul></ul>');
+          componentSublist.appendTo(componentListItem);
+
+          // Find the timeline record for the task, then get the log url
+          for (let j = 0; j < timeline.records.length; j += 1) {
+            if (timeline.records[j].task != null && timeline.records[j].id === taskWithIssues.taskId) {
+              const logUrl = timeline.records[j].log.url;
+              const logId = timeline.records[j].log.id;
+              let logLines = 0;
+              for (let k = 0; k < logsJson.length; k += 1) {
+                if (logsJson[k].id === logId) {
+                  logLines = logsJson[k].lineCount;
+                  break;
+                }
+              }
+
+              if (logLines > 100000) {
+                const content = '<li>⚠️<i>Warning: log file too large to parse</i></li>';
+                $(content).appendTo(componentSublist);
+                infraErrorCount += 1;
+                break;
+              }
+
+              // Fetch the log
+              // eslint-disable-next-line no-await-in-loop
+              queryResponse = await fetch(logUrl);
+              // eslint-disable-next-line no-await-in-loop
+              const log = await queryResponse.text();
+
+              // Test all patterns against log
+              const knownBuildErrors = knownIssues.log_patterns;
+              for (let k = 0; k < knownBuildErrors.length; k += 1) {
+                if (knownBuildErrors[k].category === 'Infrastructure' && new RegExp(knownBuildErrors[k].pipeline_match).test(pipelineName)) {
+                  let matchString = knownBuildErrors[k].match;
+                  if (knownBuildErrors[k].match_flag === 'dotmatchall') {
+                    matchString = matchString.replace('.', '[\\s\\S]');
+                  }
+                  const matches = log.match(new RegExp(matchString, 'g')) || [];
+                  if (matches.length) {
+                    let content = `${knownBuildErrors[k].cause} (x${matches.length})`;
+                    if (knownBuildErrors[k].public_comment) {
+                      content = `${content}<br>${knownBuildErrors[k].public_comment}`;
+                    }
+                    $(`<li>${content}</li>`).appendTo(componentSublist);
+                    infraErrorCount += 1;
+                    tasksWithInfraErrors.push(taskWithIssues.taskName);
+                  }
+                }
+              }
+              break;
+            }
+          }
+
+          if (infraErrorCount) {
+            componentListItem.appendTo(infraErrorsList);
+            numTasksAdded += 1;
+          }
+        }
+
+        if (numTasksAdded === 0) {
+          $('<p>None</p>').appendTo(newCardContent);
+        }
+
+        if (knownIssues.more_info_html) {
+          $(knownIssues.more_info_html).appendTo(newCardContent);
+        }
+
+        session.onEveryNew(document, '.issues-card-content .secondary-text', secondaryText => {
+          const taskName = secondaryText.textContent.split(' • ')[1];
+          if (tasksWithInfraErrors.includes(taskName)) {
+            $('<span> ⚠️POSSIBLE INFRA ERROR</span>').appendTo(secondaryText);
+          }
+        });
+
+        newCardContent.find('.loading-indicator').remove();
+      });
+    });
+  }
+
   function watchForNewDiffs(isDarkTheme) {
     if (isDarkTheme) {
       addStyleOnce('highlight', `