Skip to content

Commit

Permalink
Merge branch 'dev-1.0.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
cutler-scott-newrelic committed Mar 6, 2020
2 parents 4fd6259 + 57c5549 commit e2c349a
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_hogs"
version = "1.0.1"
version = "1.0.2"
authors = ["Scott Cutler <[email protected]>"]
edition = "2018"
description = "This project provides a set of scanners that will use regular expressions to try and detect the presence of sensitive information such as API keys, passwords, and personal information. It includes a set of regular expressions by default, but will also accept a JSON object containing your custom regular expressions."
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ Download and unzip the [latest ZIP](https://github.com/newrelic/rusty-hog/releas
on the releases tab. Then, run each binary with `-h` to see the usage.

```shell script
wget https://github.com/newrelic/rusty-hog/releases/download/v1.0.1/rustyhogs-musl_darwin_1.0.1.zip
unzip rustyhogs-musl_darwin_1.0.1.zip
wget https://github.com/newrelic/rusty-hog/releases/download/v1.0.2/rustyhogs-musl_darwin_1.0.2.zip
unzip rustyhogs-musl_darwin_1.0.2.zip
darwin_releases/choctaw_hog -h
```

Expand Down
5 changes: 3 additions & 2 deletions scripts/gh_org_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
def f(x):
filename = os.path.join(tempdir, str(uuid.uuid4()))
# expects choctaw_hog in your path
s = subprocess.run(["choctaw_hog", "--outputfile", filename, "--regex", "trufflehog_rules.json", x.ssh_url],
s = subprocess.run(["choctaw_hog", "--outputfile", filename, x.ssh_url],
capture_output=True)
return {"repo": x.name, "results": filename}

Expand All @@ -46,13 +46,14 @@ def f(x):
result_list = json.load(f)
for finding in result_list:
writer.writerow([result['repo'],
result['reason'],
finding['reason'],
str(finding['stringsFound']),
finding['path'],
finding['commit'],
finding['commitHash'],
finding['date']])
except:
pass
os.remove(result['results'])

print("Output written to output.csv")
70 changes: 47 additions & 23 deletions scripts/ghe_secret_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@
import uuid
import logging
import sys
import random
import urllib.parse

if len(sys.argv) == 2 and sys.argv[1].startswith("--log="):
loglevel = sys.argv[1][6:]
else:
loglevel = "WARNING"
loglevel = "WARNING"
sample = False
for arg in sys.argv:
if arg.startswith("--sample="):
sample = int(arg[9:])
if arg.startswith("--log="):
loglevel = arg[6:]

numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
Expand All @@ -30,16 +35,20 @@
CHOCTAW_HOG_PATH = os.environ["CHOCTAW_HOG_PATH"]

# initialize GitHub object and list of all repos
logging.info("Trying to authenticate to Github...")
g = Github(base_url=f"https://{GHE_DOMAIN}/api/v3", login_or_token=GHE_REPO_TOKEN, per_page=100)
repos = g.get_repos()
if sample:
logging.info(f"sample size set to {sample}, retrieving list of repos...")
repos = random.sample(list(repos), sample)

# use the datetime library to get an object representing 48 hours ago
today = datetime.today()
twentyfourhoursago = today - timedelta(hours=24)

# start the first main set of work: translate our list of repo objects to a dict of { git_url : since_commit_hash }
repo_dict = {}

logging.info("Getting a list of all commits since 24 hours ago for each repo...")
for repo in repos:
commits = []
try:
Expand All @@ -54,8 +63,9 @@
logging.debug("no SSH URL")
continue
logging.info(f"({repo.ssh_url}, {commits[-1].sha}")
repo_dict[repo.ssh_url] = (commits[-1].sha, f"{repo.html_url}/commit/")
repo_dict[repo.ssh_url] = (commits[-1].sha, repo.html_url)

logging.info("Completed Github API requests...")
repo_dict = dict(
filter(lambda x: x[1], repo_dict.items())
) # and filter out key/value pairs with None as a value
Expand All @@ -66,25 +76,24 @@
# git url as the key and the filename containing the results as the value
tempdir = tempfile.gettempdir()


logging.info("Starting choctaw hog scan of all commits over the last 24 hours...")
def scan_repo(x):
filename = os.path.join(tempdir, str(uuid.uuid4()))
s = subprocess.run(
[
CHOCTAW_HOG_PATH,
"--outputfile",
filename,
"--since_commit",
x[1][0],
"--sshkeypath",
SSH_KEY_PATH,
x[0],
],
capture_output=True,
)
cmdline = [
CHOCTAW_HOG_PATH,
"--outputfile",
filename,
"--since_commit",
x[1][0],
"--sshkeypath",
SSH_KEY_PATH,
x[0],
]
logging.info(f"Running choctaw hog: {str(cmdline)}")
s = subprocess.run(cmdline, capture_output=True)
logging.info(f"choctaw hog output: {s.stdout} {s.stderr}")
return {"repo": x[0], "results": filename, "url": x[1][1]}


output = []

# increase this number to the number of cores you have - runs great on a c5n.4xlarge with 14
Expand All @@ -95,27 +104,42 @@ def scan_repo(x):
logging.debug(output)

# the last block of work, iterate through each JSON file from choctaw_hog and put the results in Insights
logging.info("Collecting choctaw hog output into a single python list...")
output_array = []
for result_dict in output:
try:
f = open(result_dict["results"], "r")
except:
# TODO: add better error handling here. the file won't exist if we couldn't
# access the git repo
logging.warning("failed to open " + result_dict["results"])
continue

with f:
result_list = json.load(f)
for finding in result_list:
fileurl = ""
if finding["new_line_num"] != 0:
fileurl = f"{result_dict['url']}/blob/{finding['commitHash']}/{finding['path']}#L{finding['new_line_num']}"
else:
fileurl = f"{result_dict['url']}/blob/{finding['parent_commit_hash']}/{finding['path']}#L{finding['old_line_num']}"
output_array.append(
{
"eventType": "ghe_secret_monitor",
"commitHash": finding["commitHash"],
"reason": finding["reason"],
"path": finding["path"],
"repo": result_dict["repo"],
"url": result_dict["url"] + finding["commitHash"]
"url": f"{result_dict['url']}/commit/{finding['commitHash']}/{finding['path']}",
"fileurl": fileurl,
"old_line_num": finding["old_line_num"],
"new_line_num": finding["new_line_num"],
"parent_commitHash": finding["parent_commit_hash"]
}
)

os.remove(result_dict["results"])

url = "https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
headers = {
"Content-Type": "application/json",
Expand All @@ -125,6 +149,6 @@ def scan_repo(x):
post = gzip.compress(json.dumps(output_array).encode("utf-8"))
logging.info(f"len(output_array) = {len(output_array)}")
logging.debug(output_array)
logging.info("Submitting data to New Relic Insights...")
r = requests.post(url, data=post, headers=headers)
logging.info(f"insights status code: {r.status_code}")

5 changes: 2 additions & 3 deletions scripts/ghe_secret_monitor.service
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ Wants=ghe_secret_monitor.timer

[Service]
Type=oneshot
ExecStart=/usr/bin/git pull origin
ExecStart=/home/ec2-user/anaconda3/bin/python scripts/ghe_secret_monitor.py
WorkingDirectory=/home/ec2-user/rusty-hog
WorkingDirectory=/home/ec2-user/rusty_hog_1.0.2
User=ec2-user
Group=ec2-user
Environment='PATH=/root/anaconda3/bin:/root/anaconda3/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/root/.local/bin:/root/bin:/root/.local/bin:/root/bin'
Expand All @@ -19,4 +18,4 @@ Environment=CHOCTAW_HOG_PATH=musl_releases/choctaw_hog


[Install]
WantedBy=multi-user.target
WantedBy=multi-user.target
34 changes: 28 additions & 6 deletions scripts/jira_secret_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@

issues = []
r = requests.get(url, headers=headers)
result = r.json()
result = None
try:
result = r.json()
except:
print(f"JIRA error: {r.text}")
sys.exit(1)
total = result['total']
issues.extend(result['issues'])
while len(issues) < total:
Expand All @@ -46,6 +51,7 @@
gdoc_re = re.compile(r'https://docs.google.com/[^\s|\]]+', re.IGNORECASE)
links = defaultdict(set)

logging.info("Reading issue descriptions...")
for issue in issues:
description = issue['fields']['description']
if not description:
Expand All @@ -54,6 +60,7 @@
for match in matches:
links[issue['key']].add(match)

logging.info("Retrieving issue comments...")
for issue in issues:
url = f"https://newrelic.atlassian.net/rest/api/2/issue/{issue['key']}/comment"
r = requests.get(url, headers=headers)
Expand All @@ -67,20 +74,32 @@
gdoc_id_re = re.compile(r'https://docs.google.com/\w+/d/([a-zA-Z0-9-_]+)/?.*',re.IGNORECASE)
output = []

logging.info("Running ankamali hog on each Google Drive link found in Jira...")
for x in links.items():
logging.debug(f"x: {str(x)}")
filename = os.path.join(tempdir, str(uuid.uuid4()))
results = []
for gdoc_link in x[1]:
gdocid = gdoc_id_re.match(gdoc_link).group(1)
logging.debug(f"gdoc_link: {gdoc_link}")
logging.debug(f"gdoc_id_re.match(gdoc_link): {str(gdoc_id_re.match(gdoc_link))}")
gdoc_id_match = gdoc_id_re.match(gdoc_link)
if not gdoc_id_match:
continue
gdocid = gdoc_id_match.group(1)
s = subprocess.run(
[
ANKAMALI_HOG_PATH,
"--outputfile",
filename,
gdocid
],
capture_output=True,
capture_output=True
)
logging.debug(f"ankamali hog output: {s.stdout}")
if s.returncode != 0:
logging.warning(f"ankamali hog exited with a non-zero status code: {s.stdout} {s.stderr}")
# TODO: add better error handling here. some will fail because you don't have
# permission to the doc. others will fail because you setup your token wrong.
results.append({"gdoc_link": gdoc_link, "results": filename, "key": x[0]})
output.extend(results)

Expand All @@ -93,7 +112,9 @@
try:
f = open(result_dict["results"], "r")
except:
logging.debug("failed to open " + result_dict["results"])
# TODO: add better error handling here. the file won't exist if we couldn't
# access the file
logging.warning("failed to open " + result_dict["results"])
continue

with f:
Expand All @@ -108,8 +129,9 @@
"reason": finding["reason"]
}
)
os.remove(result_dict["results"])

url = "https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
url = f"https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
headers = {
"Content-Type": "application/json",
"X-Insert-Key": INSIGHTS_INSERT_KEY,
Expand All @@ -119,4 +141,4 @@
logging.info(f"len(output_array) = {len(output_array)}")
logging.debug(output_array)
r = requests.post(url, data=post, headers=headers)
logging.info(f"insights status code: {r.status_code}")
logging.info(f"insights status code: {r.status_code}")
7 changes: 3 additions & 4 deletions scripts/jira_secret_scanner.service
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@ Wants=jira_secret_scanner.timer

[Service]
Type=oneshot
ExecStart=/usr/bin/git pull origin
ExecStart=/home/ec2-user/anaconda3/bin/python jira_secret_scanner.py
WorkingDirectory=/home/ec2-user/secret_scanner
WorkingDirectory=/home/ec2-user/rusty_hog_1.0.2
User=ec2-user
Group=ec2-user
Environment='PATH=/root/anaconda3/bin:/root/anaconda3/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/root/.local/bin:/root/bin:/root/.local/bin:/root/bin'
Environment=INSIGHTS_INSERT_KEY=<redacted for git>
Environment=INSIGHTS_ACCT_ID=<redacted for git>
Environment=JIRA_TOKEN=<redacted for git>
Environment=ANKAMALI_HOG_PATH=./ankamali_hog
Environment=ANKAMALI_HOG_PATH=musl_releases/ankamali_hog


[Install]
WantedBy=multi-user.target
WantedBy=multi-user.target
2 changes: 1 addition & 1 deletion src/bin/ankamali_hog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use rusty_hogs::{SecretScanner, SecretScannerBuilder};
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
fn main() {
let matches = clap_app!(ankamali_hog =>
(version: "1.0.1")
(version: "1.0.2")
(author: "Scott Cutler <[email protected]>")
(about: "Google Drive secret scanner in Rust.")
(@arg REGEX: --regex +takes_value "Sets a custom regex JSON file")
Expand Down
2 changes: 1 addition & 1 deletion src/bin/berkshire_hog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use std::iter::FromIterator;
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
fn main() {
let matches = clap_app!(berkshire_hog =>
(version: "1.0.1")
(version: "1.0.2")
(author: "Scott Cutler <[email protected]>")
(about: "S3 secret hunter in Rust. Avoid bandwidth costs, run this within a VPC!")
(@arg REGEX: --regex +takes_value "Sets a custom regex JSON file")
Expand Down
2 changes: 1 addition & 1 deletion src/bin/choctaw_hog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ use rusty_hogs::{SecretScanner, SecretScannerBuilder};
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
fn main() {
let matches = clap_app!(choctaw_hog =>
(version: "1.0.1")
(version: "1.0.2")
(author: "Scott Cutler <[email protected]>")
(about: "Git secret scanner in Rust")
(@arg REGEX: -r --regex +takes_value "Sets a custom regex JSON file")
Expand Down
Loading

0 comments on commit e2c349a

Please sign in to comment.