Skip to content

Commit 9b3d9e3

Browse files
committed
lab5: performance & c5
1 parent 5a18aa5 commit 9b3d9e3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2938
-10
lines changed

C1/README.md

-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@
22

33
- [note](./note/README.md)
44
- [practice](./practice/README.md)
5-

C2/README.md

-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,3 @@
33
- [note](./note/README.md)
44
- [practice](./practice/README.md)
55
- [homework](./homework/README.md)
6-
7-
> Operation System: macOS 10.15

C3/README.md

-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,3 @@
33
- [note](./note/README.md)
44
- [practice](./practice/README.md)
55
- [homework](./homework/README.md)
6-
7-
> Operation System: macOS 10.15

C4/README.md

-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@
44
- [practice](./practice/README.md)
55
- [homework](./homework/README.md)
66

7-
> Operation System: macOS 10.15

C4/practice/README.md

+36
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,39 @@
88
- [4.6](./4.6/README.md)
99
- [4.7](./4.7/README.md)
1010
- [4.8](./4.8/README.md)
11+
- [4.9](./4.9/README.md)
12+
- [4.10](./4.10/README.md)
13+
- [4.11](./4.11/README.md)
14+
- [4.12](./4.12/README.md)
15+
- [4.13](./4.13/README.md)
16+
- [4.14](./4.14/README.md)
17+
- [4.15](./4.15/README.md)
18+
- [4.16](./4.16/README.md)
19+
- [4.17](./4.17/README.md)
20+
- [4.18](./4.18/README.md)
21+
- [4.19](./4.19/README.md)
22+
- [4.20](./4.20/README.md)
23+
- [4.21](./4.21/README.md)
24+
- [4.22](./4.22/README.md)
25+
- [4.23](./4.23/README.md)
26+
- [4.24](./4.24/README.md)
27+
- [4.25](./4.25/README.md)
28+
- [4.26](./4.26/README.md)
29+
- [4.27](./4.27/README.md)
30+
- [4.28](./4.28/README.md)
31+
- [4.29](./4.29/README.md)
32+
- [4.30](./4.30/README.md)
33+
- [4.31](./4.31/README.md)
34+
- [4.32](./4.32/README.md)
35+
- [4.33](./4.33/README.md)
36+
- [4.34](./4.34/README.md)
37+
- [4.35](./4.35/README.md)
38+
- [4.36](./4.36/README.md)
39+
- [4.37](./4.37/README.md)
40+
- [4.38](./4.38/README.md)
41+
- [4.39](./4.39/README.md)
42+
- [4.40](./4.40/README.md)
43+
- [4.41](./4.41/README.md)
44+
- [4.42](./4.42/README.md)
45+
- [4.43](./4.43/README.md)
46+
- [4.44](./4.44/README.md)

C5/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Chapter5: Optimizing Program Performance
2+
3+
- [note](./note/README.md)
4+
- [practice](./practice/README.md)
5+
- [homework](./homework/README.md)

C5/homework/5.13/README.md

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# 5.13
2+
3+
A.
4+
5+
```txt
6+
+----+----+----+----+-----+-----+
7+
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
8+
+----+----+----+----+-----+-----++----+
9+
+----|----|----|----------|--->| |
10+
| +----|----|----------|--->|load| vmovad 0(%rbp,%rcx,8),%xmm1
11+
| | | | +-----|----| |
12+
| | | | | | +----+
13+
| +----|----|----------|--->| |
14+
| | | | | | |load|---+
15+
| | +----|----------|--->| | |
16+
| | | | | | +----+ | vmulsd (%rax,%rcx,8),%xmm1,%xmm0
17+
| | | | | | | |<--+
18+
| | | | +-----|--->|mul |
19+
| | | | +-----|----| |
20+
| | | | | | +----+
21+
| | | | +-----|--->| |
22+
| | | | | +--->|add | vaddsd %xmm1,%xmm0,%xmm0
23+
| | | | | +----| |
24+
| | | | | | +----+
25+
| +----|----|----------|--->| |
26+
| | | | | |add | addq $1, %rcx
27+
| +----|----|----------|----| |
28+
| | | | | | +----+
29+
| +----|----|----------|--->| |
30+
| | | | | | |cmp |---+ cmpq %rbx, %rcx
31+
| | | +----|-----|--->| | |
32+
| | | | | | +----+ |
33+
| | | | | | | | |
34+
| | | | | | |jne |<--+ jne .L15
35+
| | | | | | | |
36+
| | | | | | +----+
37+
v v v v v v
38+
+----+----+----+----+-----+-----+
39+
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
40+
+----+----+----+----+-----+-----+
41+
42+
43+
44+
+----+ +-----+
45+
|%rcx| |%xmm0|
46+
+----+ +-----+
47+
| |
48+
| +----+ | <--------- key path
49+
+---->|load|------+ |
50+
| +----+ | |
51+
| v v
52+
| +----+ +-+--+ +-+--+
53+
+---->|load|--->|mul |--->|add |
54+
| +----+ +----+ +----+
55+
| |
56+
| |
57+
v |
58+
+----+ |
59+
|add | |
60+
+----+ |
61+
| |
62+
v v
63+
+-+--+ +-----+
64+
|%rcx| |%xmm0|
65+
+----+ +-----+
66+
```
67+
68+
B. C. D. because `add` is key path.

C5/homework/5.14/README.md

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# 5.14
2+
3+
```cpp
4+
void inner4_loop_unrolling(vec_ptr u, vec_ptr v, data_t *dest) {
5+
long i;
6+
long length = vec_length(u);
7+
data_t *udata = get_vec_start(u);
8+
data_t *vdata = get_vec_start(v);
9+
10+
data_t sum1 = (data_t) 0;
11+
data_t sum2 = (data_t) 0;
12+
data_t sum3 = (data_t) 0;
13+
data_t sum4 = (data_t) 0;
14+
data_t sum5 = (data_t) 0;
15+
data_t sum6 = (data_t) 0;
16+
17+
for (i = 0; i < length; i+=6) {
18+
sum1 = sum1 + udata[i] * vdata[i];
19+
sum2 = sum2 + udata[i + 1] * vdata[i + 1];
20+
sum3 = sum3 + udata[i + 2] * vdata[i + 2];
21+
sum4 = sum4 + udata[i + 3] * vdata[i + 3];
22+
sum5 = sum5 + udata[i + 4] * vdata[i + 4];
23+
sum6 = sum6 + udata[i + 5] * vdata[i + 5];
24+
}
25+
26+
for (; i < length; i++) {
27+
sum1 = sum1 + udata[i] * vdata[i];
28+
}
29+
30+
*dest = sum1 + sum2 + sum3 + sum4 + sum5 + sum6;
31+
32+
}
33+
```
34+
35+
A. Because the bound of CPE is 1.0
36+
37+
B.

C5/homework/5.14/innner4.c

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
void inner4_loop_unrolling(vec_ptr u, vec_ptr v, data_t *dest) {
2+
long i;
3+
long length = vec_length(u);
4+
data_t *udata = get_vec_start(u);
5+
data_t *vdata = get_vec_start(v);
6+
7+
data_t sum = (data_t) 0;
8+
9+
for (i = 0; i < length; i+=6) {
10+
sum = sum + udata[i] * vdata[i]
11+
+ udata[i + 1] * vdata[i + 1]
12+
+ udata[i + 2] * vdata[i + 2]
13+
+ udata[i + 3] * vdata[i + 3]
14+
+ udata[i + 4] * vdata[i + 4]
15+
+ udata[i + 5] * vdata[i + 5];
16+
}
17+
18+
for (; i < length; i++) {
19+
sum = sum + udata[i] * vdata[i];
20+
}
21+
22+
*dest = sum;
23+
24+
}

C5/homework/5.15/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# 5.15

C5/homework/5.15/innner4.c

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// 6 * 6
2+
void inner4_loop_unrolling(vec_ptr u, vec_ptr v, data_t *dest) {
3+
long i;
4+
long length = vec_length(u);
5+
data_t *udata = get_vec_start(u);
6+
data_t *vdata = get_vec_start(v);
7+
8+
data_t sum1 = (data_t) 0;
9+
data_t sum2 = (data_t) 0;
10+
data_t sum3 = (data_t) 0;
11+
data_t sum4 = (data_t) 0;
12+
data_t sum5 = (data_t) 0;
13+
data_t sum6 = (data_t) 0;
14+
15+
for (i = 0; i < length; i+=6) {
16+
sum1 = sum1 + udata[i] * vdata[i];
17+
sum2 = sum2 + udata[i + 1] * vdata[i + 1];
18+
sum3 = sum3 + udata[i + 2] * vdata[i + 2];
19+
sum4 = sum4 + udata[i + 3] * vdata[i + 3];
20+
sum5 = sum5 + udata[i + 4] * vdata[i + 4];
21+
sum6 = sum6 + udata[i + 5] * vdata[i + 5];
22+
}
23+
24+
for (; i < length; i++) {
25+
sum1 = sum1 + udata[i] * vdata[i];
26+
}
27+
28+
*dest = sum1 + sum2 + sum3 + sum4 + sum5 + sum6;
29+
30+
}

C5/homework/5.16/README.md

Whitespace-only changes.

C5/homework/5.16/innner4.c

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
void inner4_loop_unrolling(vec_ptr u, vec_ptr v, data_t *dest) {
2+
long i;
3+
long length = vec_length(u);
4+
data_t *udata = get_vec_start(u);
5+
data_t *vdata = get_vec_start(v);
6+
7+
data_t sum = (data_t) 0;
8+
9+
for (i = 0; i < length; i+=6) {
10+
sum = sum + (udata[i] * vdata[i]
11+
+ udata[i + 1] * vdata[i + 1]
12+
+ udata[i + 2] * vdata[i + 2]
13+
+ udata[i + 3] * vdata[i + 3]
14+
+ udata[i + 4] * vdata[i + 4]
15+
+ udata[i + 5] * vdata[i + 5]);
16+
}
17+
18+
for (; i < length; i++) {
19+
sum = sum + udata[i] * vdata[i];
20+
}
21+
22+
*dest = sum;
23+
24+
}

C5/homework/5.17/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# 5.17

C5/homework/5.17/memset.c

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* K = sizeof(unsigned long)
3+
* cs store K chars for memset
4+
*/
5+
void* effective_memset(void *s, unsigned long cs, size_t n) {
6+
/* align to K */
7+
size_t K = sizeof(unsigned long);
8+
size_t cnt = 0;
9+
unsigned char *schar = s;
10+
while (cnt < n) {
11+
if ((size_t)schar % K == 0) {
12+
break;
13+
}
14+
*schar++ = (unsigned char)cs;
15+
cnt++;
16+
}
17+
18+
/* set K chars one time */
19+
unsigned long *slong = (unsigned long *)schar;
20+
size_t rest = n - cnt;
21+
size_t loop = rest / K;
22+
size_t tail = rest % K;
23+
24+
for (size_t i = 0; i < loop; i++) {
25+
*slong++ = cs;
26+
}
27+
28+
/* pad the tail part */
29+
schar = (unsigned char *)slong;
30+
for (size_t i = 0; i < tail; i++) {
31+
*schar++ = (unsigned char)cs;
32+
}
33+
return s;
34+
}

C5/homework/5.18/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# 5.18

C5/homework/5.18/poly.c

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* calculate a0 + a1*x + a2*x^2 + ... + an*x^n */
2+
double poly(double a[], double x, long degree) {
3+
long i;
4+
double result = a[0];
5+
double xpwr = x;
6+
for (i = 1; i <= degree; i++) {
7+
result += a[i] * xpwr;
8+
xpwr = x * xpwr;
9+
}
10+
return result;
11+
}
12+
13+
/* version 6*3a */
14+
double poly_6_3a(double a[], double x, long degree) {
15+
long i = 1;
16+
double result = a[0];
17+
double result1 = 0;
18+
double result2 = 0;
19+
20+
double xpwr = x;
21+
double xpwr1 = x * x * x;
22+
double xpwr2 = x * x * x * x * x;
23+
24+
double xpwr_step = x * x * x * x * x * x;
25+
for (; i <= degree - 6; i+=6) {
26+
result = result + (a[i]*xpwr + a[i+1]*xpwr*x);
27+
result1 = result1 + (a[i+2]*xpwr1 + a[i+3]*xpwr1*x);
28+
result2 = result2 + (a[i+4]*xpwr2 + a[i+5]*xpwr2*x);
29+
30+
xpwr *= xpwr_step;
31+
xpwr1 *= xpwr_step;
32+
xpwr2 *= xpwr_step;
33+
}
34+
35+
for (; i <= degree; i++) {
36+
result = result + a[i]*xpwr;
37+
xpwr *= x;
38+
}
39+
40+
return result + result1 + result2;
41+
}
42+
43+
/* apply horner's method */
44+
double polyh(double a[], double x, long degree) {
45+
long i;
46+
double result = a[degree];
47+
for (i = degree-1; i >= 0; i--) {
48+
result = a[i] + x*result;
49+
}
50+
return result;
51+
}

C5/homework/5.19/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# 5.19

C5/homework/5.19/presum.c

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* version 4*1a */
2+
void psum_4_1a(float a[], float p[], long n) {
3+
long i;
4+
float val, last_val;
5+
float tmp, tmp1, tmp2, tmp3;
6+
last_val = p[0] = a[0];
7+
8+
for (i = 1; i < n - 4; i++) {
9+
tmp = last_val + a[i];
10+
tmp1 = tmp + a[i+1];
11+
tmp2 = tmp1 + a[i+2];
12+
tmp3 = tmp2 + a[i+3];
13+
14+
p[i] = tmp;
15+
p[i+1] = tmp1;
16+
p[i+2] = tmp2;
17+
p[i+3] = tmp3;
18+
19+
/* key point */
20+
last_val = last_val + (a[i] + a[i+1] + a[i+2] + a[i+3]);
21+
}
22+
23+
for (; i < n; i++) {
24+
last_val += a[i];
25+
p[i] = last_val;
26+
}
27+
}

C5/homework/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Chapter 5 HomeWork Problems
2+
3+
- [5.13](./5.13/README.md)
4+
- [5.14](./5.14/README.md)
5+
- [5.15](./5.15/README.md)
6+
- [5.16](./5.16/README.md)
7+
- [5.17](./5.17/README.md)
8+
- [5.18](./5.18/README.md)
9+
- [5.19](./5.19/README.md)

0 commit comments

Comments
 (0)