|
| 1 | +''' |
| 2 | +Copyright (c) 2011-2014 F-Secure |
| 3 | +See LICENSE for details |
| 4 | +''' |
| 5 | +import datetime, logging |
| 6 | +from model_extraction import csv_unicode |
| 7 | + |
| 8 | +LOGGER = logging.getLogger('root.' + __name__) |
| 9 | + |
| 10 | +def _procmon_time_to_timestamp(time_as_string): |
| 11 | + ''' |
| 12 | + Convenient converter from process monitor time string to python time |
| 13 | + ''' |
| 14 | + hour, mins, secs = time_as_string.split(":") |
| 15 | + secs, millis = secs.split(",") |
| 16 | + millis = millis[:6] |
| 17 | + return datetime.time(int(hour), int(mins), int(secs), int(millis)) |
| 18 | + |
| 19 | +def _should_correlate(edge, operation): |
| 20 | + ''' |
| 21 | + Returns True if the given operation for the given edge should be considered |
| 22 | + False otherwise |
| 23 | + ''' |
| 24 | + if not edge is None: |
| 25 | + return (operation == "WriteFile" or |
| 26 | + operation.startswith("TCP") or |
| 27 | + operation == "RegSetValue") |
| 28 | + else: |
| 29 | + return False |
| 30 | + |
| 31 | +def _should_ignore_by_path(path): |
| 32 | + ''' |
| 33 | + Returns True if the given path should be ignored when correlating |
| 34 | + ''' |
| 35 | + if path.startswith("C:\\Users\\testuser\\AppData\\Local\\"): |
| 36 | + return True |
| 37 | + else: |
| 38 | + return False |
| 39 | + |
| 40 | +_IGNORABLE_PROCESSES = ["mobsync.exe", "wmpnetwk.exe", "spoolsv.exe", |
| 41 | + "smss.exe", "lsm.exe", "wmpnscfg.exe", |
| 42 | + "python.exe", "cmd.exe", "wermgr.exe", |
| 43 | + "conhost.exe", "tvnserver.exe", "Explorer.EXE", |
| 44 | + "Procmon.exe", "Procmon64.exe", "svchost.exe", |
| 45 | + "DllHost.exe", "System", "taskhost.exe", |
| 46 | + "services.exe", "lsass.exe", "wmiprvse.exe"] |
| 47 | + |
| 48 | +def _should_ignore_process(process): |
| 49 | + ''' |
| 50 | + Returns True if the given process should be ignored when correlating |
| 51 | + ''' |
| 52 | + return process in _IGNORABLE_PROCESSES |
| 53 | + |
| 54 | +class Correlator(object): |
| 55 | + ''' |
| 56 | + Simple correlation between a scraper timeline and log entries by it's |
| 57 | + time, this correlator is specific to ProcessMonitor csv output. |
| 58 | + TODO: keep track of correlated logs so it can support multiple runs |
| 59 | + ''' |
| 60 | + def __init__(self, world, timeline): |
| 61 | + self.timeline = timeline |
| 62 | + self._world = world |
| 63 | + |
| 64 | + def _dump_timeline(self): |
| 65 | + ''' |
| 66 | + Simple debug output print of the timeline |
| 67 | + ''' |
| 68 | + print "Timeline is:" |
| 69 | + for i in range(len(self.timeline)): |
| 70 | + if self.timeline[i] == "reboot": |
| 71 | + print "reboot" |
| 72 | + else: |
| 73 | + print "\t%s: %s" % (self.timeline[i]['from'].name, |
| 74 | + self.timeline[i]['departure']) |
| 75 | + |
| 76 | + def _add_trace_to_edge(self, edge, trace_index): |
| 77 | + ''' |
| 78 | + Adds a trace, meaning the series of steps used with that index in |
| 79 | + the timeline to pair with the log info |
| 80 | + ''' |
| 81 | + steps = [] |
| 82 | + |
| 83 | + if self.timeline[trace_index - 1] == 'reboot': |
| 84 | + trace_index -= 1 |
| 85 | + |
| 86 | + for i in range(trace_index - 1, -1, -1): |
| 87 | + if self.timeline[i] == 'reboot': |
| 88 | + break |
| 89 | + timeline_edge = self.timeline[i]['from'] |
| 90 | + steps.insert(0, "%s.%s" % (timeline_edge.tail.name, |
| 91 | + timeline_edge.name)) |
| 92 | + |
| 93 | + if not 'traces' in edge.logs['instrumentation']: |
| 94 | + edge.logs['instrumentation']['traces'] = {} |
| 95 | + edge.logs['instrumentation']['traces'][trace_index] = steps |
| 96 | + |
| 97 | + def _add_logged_event_to_edge(self, edge, operation, trace_index, row): |
| 98 | + ''' |
| 99 | + Adds the given row of data to the given edge as instrumentation captured |
| 100 | + data |
| 101 | + ''' |
| 102 | + if self.timeline[trace_index - 1] == 'reboot': |
| 103 | + trace_index -= 1 |
| 104 | + |
| 105 | + if not 'instrumentation' in edge.logs: |
| 106 | + edge.logs['instrumentation'] = {} |
| 107 | + if not operation in edge.logs['instrumentation']: |
| 108 | + edge.logs['instrumentation'][operation] = {} |
| 109 | + if not trace_index in edge.logs['instrumentation'][operation]: |
| 110 | + edge.logs['instrumentation'][operation][trace_index] = [] |
| 111 | + self._add_trace_to_edge(edge, trace_index) |
| 112 | + edge.logs['instrumentation'][operation][trace_index].append(row) |
| 113 | + |
| 114 | + def _correlate_log(self, csv_reader, edge, until, timeline_index): |
| 115 | + ''' |
| 116 | + Adds events from the given log to the corresponding edge, times are |
| 117 | + matched from the crawler timeline against the edge it was executed. |
| 118 | + In this context edge represents what it was executed and until the |
| 119 | + timestamp of the next edge execution |
| 120 | + ''' |
| 121 | + last_message = "" |
| 122 | + #headers |
| 123 | + if csv_reader.next() is None: |
| 124 | + return (None, None) |
| 125 | + |
| 126 | + while True: |
| 127 | + try: |
| 128 | + row = csv_reader.next() |
| 129 | + except StopIteration: |
| 130 | + break |
| 131 | + time, process, _, operation, path, _, _ = row |
| 132 | + if _should_ignore_process(process): |
| 133 | + pass |
| 134 | + elif _should_ignore_by_path(path): |
| 135 | + pass |
| 136 | + else: |
| 137 | + timestamp = _procmon_time_to_timestamp(time) |
| 138 | + if timestamp <= until: |
| 139 | + if _should_correlate(edge, operation): |
| 140 | + message = "%s %s %s" % (process, operation, path) |
| 141 | + self._add_logged_event_to_edge(edge, |
| 142 | + operation, |
| 143 | + timeline_index, |
| 144 | + row) |
| 145 | + if message != last_message: |
| 146 | + LOGGER.debug("\t%s: %s", time, message) |
| 147 | + last_message = message |
| 148 | + else: |
| 149 | + last_message = "" |
| 150 | + while True: |
| 151 | + timeline_index += 1 |
| 152 | + if timeline_index == len(self.timeline): |
| 153 | + return (None, None) |
| 154 | + this_time = self.timeline[timeline_index] |
| 155 | + if this_time == "reboot": |
| 156 | + pass |
| 157 | + elif this_time['departure'].time() > timestamp: |
| 158 | + until = this_time['departure'].time() |
| 159 | + if (timeline_index > 2 and |
| 160 | + self.timeline[timeline_index - 1] == "reboot"): |
| 161 | + edge = self.timeline[timeline_index - 2]['from'] |
| 162 | + LOGGER.debug("%s.%s", edge.tail.name, edge.name) |
| 163 | + LOGGER.debug("Reset") |
| 164 | + else: |
| 165 | + edge = self.timeline[timeline_index - 1]['from'] |
| 166 | + LOGGER.debug("%s.%s", edge.tail.name, edge.name) |
| 167 | + break |
| 168 | + else: |
| 169 | + if self.timeline[timeline_index - 1] != "reboot": |
| 170 | + edge = self.timeline[timeline_index - 1]['from'] |
| 171 | + LOGGER.debug("%s.%s", edge.tail.name, edge.name) |
| 172 | + return (edge, timeline_index) |
| 173 | + |
| 174 | + def correlate_events(self): |
| 175 | + ''' |
| 176 | + Matches log events to edge executions |
| 177 | + ''' |
| 178 | + self._dump_timeline() |
| 179 | + |
| 180 | + timeline_index = 0 |
| 181 | + until = self.timeline[0]['departure'].time() |
| 182 | + edge = None |
| 183 | + LOGGER.info("Will try to correlate events from log with the timeline") |
| 184 | + |
| 185 | + for log in self._world.event_logs: |
| 186 | + with open(log, 'r') as log_file: |
| 187 | + csv_reader = csv_unicode.UnicodeReader(log_file, |
| 188 | + encoding="utf-8-sig") |
| 189 | + (edge, timeline_index) = self._correlate_log(csv_reader, |
| 190 | + edge, |
| 191 | + until, |
| 192 | + timeline_index) |
0 commit comments