|
10 | 10 | FOUND_FILES_PATH = os.path.join(BASE_DIR, 'found-files')
|
11 | 11 | LOG_FILE_PATH = os.path.join(BASE_DIR, 'console.log')
|
12 | 12 | ENV_FILE_PATH = os.path.join(BASE_DIR, '.env')
|
| 13 | +STATS_FILE_PATH = os.path.join(BASE_DIR, 'STATS.md') |
13 | 14 |
|
14 | 15 |
|
15 | 16 | logging.basicConfig(
|
|
23 | 24 | )
|
24 | 25 |
|
25 | 26 |
|
26 |
| -def split_array(array: list, chunk_size: int): |
27 |
| - return [array[i:i + chunk_size] for i in range(0, len(array), chunk_size)] |
28 |
| - |
29 |
| - |
30 |
| -def copy_for_reviving(): |
31 |
| - not_found = get_not_found_sequences('a', 'a') |
32 |
| - chunks = split_array(not_found, 64) |
33 |
| - data = '\n\n'.join(map(lambda x: f'await revive(`{'\\n'.join(x)}`);', chunks)) |
34 |
| - copy(data) |
35 |
| - |
36 |
| - |
37 | 27 | def get_all_stats():
|
| 28 | + # Initialize variables |
| 29 | + letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
| 30 | + stats = {} |
38 | 31 | total_overall, found_overall = 0, 0
|
39 | 32 |
|
40 |
| - for letter in 'abcdefghijklmnopqrstuvwxyz': |
| 33 | + # Step 1: Collect statistics for all letters |
| 34 | + for letter in letters: |
41 | 35 | total, found, percentage = get_statistics(letter)
|
42 |
| - logging.info(f'Letter {letter}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)') |
43 |
| - |
| 36 | + stats[letter] = (total, found, percentage) |
44 | 37 | total_overall += total
|
45 | 38 | found_overall += found
|
| 39 | + logging.info(f'Letter {letter.upper()}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)') |
46 | 40 |
|
| 41 | + # Calculate overall percentage |
47 | 42 | overall_percentage = (found_overall / total_overall * 100) if total_overall else 0
|
48 | 43 | logging.info('\n--- Overall Statistics ---')
|
49 | 44 | logging.info(f'Total: {found_overall:,} / {total_overall:,} sequences found ({overall_percentage:.2f}%)\n\n')
|
50 | 45 |
|
| 46 | + # Step 2: Construct the table |
| 47 | + stats_lines = [ |
| 48 | + "# Stats\n\n", |
| 49 | + "| Letter | Found | Percentage | Letter | Found | Percentage |\n", |
| 50 | + "|--------|----------|------------|--------|----------|------------|\n" |
| 51 | + ] |
| 52 | + |
| 53 | + # Pair letters: A with N, B with O, ..., M with Z |
| 54 | + for k in range(13): # 0 to 12 covers all 13 pairs |
| 55 | + letter1 = letters[k] # A to M (positions 0 to 12) |
| 56 | + letter2 = letters[k + 13] # N to Z (positions 13 to 25) |
| 57 | + found1, percentage1 = stats[letter1][1], stats[letter1][2] |
| 58 | + found2, percentage2 = stats[letter2][1], stats[letter2][2] |
| 59 | + row = f"| {letter1.upper():<6} | {found1:<8,} | {percentage1:>9.2f}% | {letter2.upper():<6} | {found2:<8,} | {percentage2:>9.2f}% |\n" |
| 60 | + stats_lines.append(row) |
| 61 | + |
| 62 | + # Add overall statistics |
| 63 | + stats_lines.append("\n| Total Sequences Found | Percentage |\n") |
| 64 | + stats_lines.append("|-----------------------|------------|\n") |
| 65 | + stats_lines.append(f"| {found_overall:<21,} | {overall_percentage:>9.2f}% |\n") |
| 66 | + |
| 67 | + # Step 3: Write to STATS.md |
| 68 | + with open(STATS_FILE_PATH, 'w') as stats_file: |
| 69 | + stats_file.writelines(stats_lines) |
| 70 | + |
51 | 71 |
|
52 | 72 | def process_everything():
|
53 |
| - sub_directory = 'other' |
54 |
| - for file_name in os.listdir(os.path.join(FOUND_FILES_PATH, sub_directory)): |
55 |
| - file_path = os.path.join(FOUND_FILES_PATH, sub_directory, file_name) |
| 73 | + for file_name in os.listdir(FOUND_FILES_PATH): |
| 74 | + file_path = os.path.join(FOUND_FILES_PATH, file_name) |
56 | 75 | process_single_file(file_path)
|
57 | 76 |
|
58 | 77 |
|
|
0 commit comments