Compare commits
1660 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b5f76f2cea | ||
|
|
980dcc4978 | ||
|
|
83a295c76d | ||
|
|
fe47e05c16 | ||
|
|
4ac0e7d955 | ||
|
|
0f13a02062 | ||
|
|
944625b94b | ||
|
|
960c7fb1b9 | ||
|
|
7cf3b0b004 | ||
|
|
411ca81786 | ||
|
|
818d20d273 | ||
|
|
9ba7381030 | ||
|
|
4d4b6c7ac9 | ||
|
|
48062573b8 | ||
|
|
35940f6f3d | ||
|
|
1536d6dfa9 | ||
|
|
c53a1f1dac | ||
|
|
ddaa510553 | ||
|
|
1ccf853b04 | ||
|
|
8243604943 | ||
|
|
8f720b55ff | ||
|
|
7e0033743d | ||
|
|
5239b706c3 | ||
|
|
89786f6e26 | ||
|
|
25c751c45a | ||
|
|
52039e80f2 | ||
|
|
046df82ccc | ||
|
|
28af5230d3 | ||
|
|
a3d8b5d225 | ||
|
|
518ce1e946 | ||
|
|
28aae73f74 | ||
|
|
caa1b1a792 | ||
|
|
f828fed015 | ||
|
|
be0e3b9cc4 | ||
|
|
5243bfe5a0 | ||
|
|
95067dd88e | ||
|
|
c7707d1455 | ||
|
|
6e3f19d0c9 | ||
|
|
ad6c7c8986 | ||
|
|
91e262470e | ||
|
|
678548afd7 | ||
|
|
833653ce40 | ||
|
|
5e559322a0 | ||
|
|
aa72cb6ab9 | ||
|
|
16eef3f966 | ||
|
|
321c079205 | ||
|
|
fa7d515165 | ||
|
|
27bf6c17ef | ||
|
|
b6feb99e98 | ||
|
|
16ad6ee2ac | ||
|
|
6d5717e48b | ||
|
|
b6d54281a9 | ||
|
|
9258e2ced0 | ||
|
|
c0c9832713 | ||
|
|
5b4fa73e23 | ||
|
|
35f939165d | ||
|
|
98fe63b5eb | ||
|
|
8e9a701277 | ||
|
|
ce1c744de9 | ||
|
|
f7574c5adc | ||
|
|
195826f991 | ||
|
|
649994706b | ||
|
|
d48b3187b1 | ||
|
|
c16200ac02 | ||
|
|
90ed18222a | ||
|
|
b1b7be0a46 | ||
|
|
1f4bfb68a0 | ||
|
|
6db581ff4e | ||
|
|
9455c92712 | ||
|
|
ea9475dfdf | ||
|
|
0aa77c9176 | ||
|
|
8cfc614047 | ||
|
|
46163a1989 | ||
|
|
7aa8e408df | ||
|
|
5974d00ccf | ||
|
|
34fa99c7ee | ||
|
|
b9a810827d | ||
|
|
5559cb0fde | ||
|
|
6d98740b39 | ||
|
|
e9255e5a0b | ||
|
|
6feb017cbe | ||
|
|
b9b39f1957 | ||
|
|
5dd7c7e402 | ||
|
|
7a4ecc5417 | ||
|
|
85f54499fe | ||
|
|
900fbe5df8 | ||
|
|
339ed7cc4d | ||
|
|
06ac6f4f24 | ||
|
|
3be0a3f628 | ||
|
|
1650d371e6 | ||
|
|
d6aa814de5 | ||
|
|
d151aff237 | ||
|
|
b418c98e63 | ||
|
|
aa8b60a824 | ||
|
|
87e7cba289 | ||
|
|
54651f9f6d | ||
|
|
d06617b2a5 | ||
|
|
4e1ed15237 | ||
|
|
ab85fba7e0 | ||
|
|
903b7badc0 | ||
|
|
e14bb56db1 | ||
|
|
b826c14e4d | ||
|
|
3dd80c5288 | ||
|
|
83bc1466cf | ||
|
|
550e05d149 | ||
|
|
e18e9411f3 | ||
|
|
d002841fb8 | ||
|
|
983aba7a0b | ||
|
|
0a0d04fff6 | ||
|
|
b68423fc1a | ||
|
|
71440cc6ee | ||
|
|
251e8f1fe2 | ||
|
|
f07eebebf7 | ||
|
|
fee89412ed | ||
|
|
73981d4ef3 | ||
|
|
c4cf690e39 | ||
|
|
05c60259b6 | ||
|
|
22e86af4ab | ||
|
|
a5c16a39ba | ||
|
|
f589fba274 | ||
|
|
2acc1d9670 | ||
|
|
68e452802b | ||
|
|
85e8a6a81c | ||
|
|
67e86d2b27 | ||
|
|
bd51add4f1 | ||
|
|
321fa06a3d | ||
|
|
79fc917483 | ||
|
|
7fe1560ca5 | ||
|
|
1e526cfc09 | ||
|
|
9449f3ef89 | ||
|
|
a23f58cc8f | ||
|
|
ed01a3e4e0 | ||
|
|
21589a238b | ||
|
|
2a61c0a45f | ||
|
|
826affb0b5 | ||
|
|
2f94b44a6c | ||
|
|
9b1573d803 | ||
|
|
02aae23a98 | ||
|
|
498781b5bf | ||
|
|
734b0c25d9 | ||
|
|
7d34350bf3 | ||
|
|
b33445c073 | ||
|
|
b4021d7d79 | ||
|
|
4da8385300 | ||
|
|
22651eccc5 | ||
|
|
6d0d12ce78 | ||
|
|
6836014caa | ||
|
|
39197b9ff8 | ||
|
|
733b9c4048 | ||
|
|
90e01a4888 | ||
|
|
c75b940816 | ||
|
|
1f2990f709 | ||
|
|
7bc8c6283a | ||
|
|
9d25f7fb71 | ||
|
|
652582b119 | ||
|
|
dbcc908dee | ||
|
|
19246a027f | ||
|
|
efc9c7c58f | ||
|
|
65e00eb2d4 | ||
|
|
2bce9319d0 | ||
|
|
1215eb4de5 | ||
|
|
8091207d26 | ||
|
|
9633617810 | ||
|
|
a12bf2d753 | ||
|
|
2682ae1af0 | ||
|
|
a1ce31a226 | ||
|
|
be2ffb2023 | ||
|
|
b696f1b466 | ||
|
|
6866d8f429 | ||
|
|
2be3f1766f | ||
|
|
d9cbc2d030 | ||
|
|
d33fc26450 | ||
|
|
1cf6b630d2 | ||
|
|
0d2914bbe8 | ||
|
|
f367b16dc0 | ||
|
|
7f90a0a84e | ||
|
|
e406ce5aab | ||
|
|
5aff4b0f50 | ||
|
|
ab46a7a3a7 | ||
|
|
58428e7ede | ||
|
|
c1bf853310 | ||
|
|
2f5f2e5b1d | ||
|
|
ff5daf9df7 | ||
|
|
f37a324a9e | ||
|
|
29fe5f211f | ||
|
|
3e3f8aaabf | ||
|
|
25ee7f945b | ||
|
|
d8728be689 | ||
|
|
fd5ccc16d6 | ||
|
|
7e6a41e647 | ||
|
|
062bb3044f | ||
|
|
4252cac654 | ||
|
|
18713de70c | ||
|
|
b76726c597 | ||
|
|
14a15bf152 | ||
|
|
3e3aa80fa7 | ||
|
|
b7af9a0860 | ||
|
|
ab468d8c57 | ||
|
|
1ab1050c46 | ||
|
|
03f46d7228 | ||
|
|
c0b73c248f | ||
|
|
a7d2ab4d4f | ||
|
|
e91950f006 | ||
|
|
5553761c02 | ||
|
|
309a151610 | ||
|
|
88685440b6 | ||
|
|
61e3acde06 | ||
|
|
e3dfa96055 | ||
|
|
06ca4e2d16 | ||
|
|
14b180cd16 | ||
|
|
81d5a8db5e | ||
|
|
7afdef3cdb | ||
|
|
47ef56b995 | ||
|
|
02e7893c75 | ||
|
|
9d01fa86ab | ||
|
|
986a2a79da | ||
|
|
5d748f3568 | ||
|
|
e4fbf60668 | ||
|
|
346e5a426f | ||
|
|
6fe3736a23 | ||
|
|
402bfd3ba4 | ||
|
|
aa2757526a | ||
|
|
7b00812fa7 | ||
|
|
6deffca23f | ||
|
|
2bef3629b7 | ||
|
|
175ec3e3d8 | ||
|
|
416259fdb7 | ||
|
|
29c249b33b | ||
|
|
6f47d837a4 | ||
|
|
0704ab8f79 | ||
|
|
cc63b6492f | ||
|
|
08c70cd6fe | ||
|
|
08172556fc | ||
|
|
9ce46fdf2d | ||
|
|
d815b79db0 | ||
|
|
b8df7a1302 | ||
|
|
d614dbeda3 | ||
|
|
47683d3b90 | ||
|
|
cbfb19816b | ||
|
|
bd5d965023 | ||
|
|
1da51918b6 | ||
|
|
4c397ebe1e | ||
|
|
3ae84647c4 | ||
|
|
a758de2f03 | ||
|
|
6b790d778d | ||
|
|
420bdd5854 | ||
|
|
29ac0169aa | ||
|
|
333e10c724 | ||
|
|
f2d6c79742 | ||
|
|
953df69b77 | ||
|
|
125658a3eb | ||
|
|
863f37e1bc | ||
|
|
a7a36c08cb | ||
|
|
0400967909 | ||
|
|
7b98d50e27 | ||
|
|
b8f9b24223 | ||
|
|
be34e5076c | ||
|
|
a72fde03f4 | ||
|
|
39debea990 | ||
|
|
1f6d18efcd | ||
|
|
0f9597aee8 | ||
|
|
d04de344fe | ||
|
|
254fc9fc7a | ||
|
|
38cf1b8a70 | ||
|
|
276e40ab2a | ||
|
|
1fd1d3bd6b | ||
|
|
0977952bde | ||
|
|
04bc3e47c5 | ||
|
|
587fd3a0bd | ||
|
|
239e77db68 | ||
|
|
a75781beaf | ||
|
|
c768068ee7 | ||
|
|
4d505f507b | ||
|
|
199dc46e7d | ||
|
|
71ca0683d6 | ||
|
|
660f2cec7f | ||
|
|
9bcf7a9214 | ||
|
|
e48b5611c5 | ||
|
|
a3c51f0e7e | ||
|
|
21f4981f38 | ||
|
|
1b6bc1b69a | ||
|
|
c91c6be763 | ||
|
|
f6d320ebdc | ||
|
|
dc91affdc5 | ||
|
|
5e5bf928a5 | ||
|
|
6a72560989 | ||
|
|
da5e58682f | ||
|
|
1013ec8db7 | ||
|
|
384e2e3fa1 | ||
|
|
f718761905 | ||
|
|
a15964b2dc | ||
|
|
6fe90e136f | ||
|
|
08cd115db6 | ||
|
|
31aec8bea8 | ||
|
|
925bf7e638 | ||
|
|
1975c1897d | ||
|
|
605050f8e9 | ||
|
|
435fb4bb4f | ||
|
|
1bfd59c595 | ||
|
|
4179e85029 | ||
|
|
4881f7aa54 | ||
|
|
7eb179b701 | ||
|
|
763f5aafdd | ||
|
|
afbd6b8786 | ||
|
|
78028deec6 | ||
|
|
64bcf2893f | ||
|
|
d0b81c7c0b | ||
|
|
0da6342a86 | ||
|
|
1fff99ebb0 | ||
|
|
c7d94a66b0 | ||
|
|
e9788cc04c | ||
|
|
97a5345f41 | ||
|
|
37ea159646 | ||
|
|
6bee23b056 | ||
|
|
c815259481 | ||
|
|
4bbeb51e34 | ||
|
|
530e464347 | ||
|
|
cf8620b956 | ||
|
|
493d433330 | ||
|
|
b0d67064b5 | ||
|
|
0fff0ac754 | ||
|
|
cfb11ff8c1 | ||
|
|
9fa80c2cc8 | ||
|
|
5c12c575b2 | ||
|
|
7f2bc0d177 | ||
|
|
b353eb753b | ||
|
|
bdf2729845 | ||
|
|
b03da81afa | ||
|
|
29886435b4 | ||
|
|
d8466c5839 | ||
|
|
f7727f3aa4 | ||
|
|
5ccd62ced8 | ||
|
|
6d74f4e8ff | ||
|
|
561664187a | ||
|
|
d1e17de7c2 | ||
|
|
a110b42011 | ||
|
|
25c7245566 | ||
|
|
5610bd0f8b | ||
|
|
2b5e45cd9b | ||
|
|
b4d6833231 | ||
|
|
11e1671048 | ||
|
|
a8c752e2ab | ||
|
|
b0d71b648f | ||
|
|
c507507233 | ||
|
|
47dfddedfd | ||
|
|
7a6141389c | ||
|
|
dd05c8f524 | ||
|
|
71359c46c1 | ||
|
|
634b0933d1 | ||
|
|
9c1dd6d498 | ||
|
|
21ddd0a820 | ||
|
|
cbcf393332 | ||
|
|
3992eb0542 | ||
|
|
2ab605d232 | ||
|
|
2608ceca05 | ||
|
|
2723144678 | ||
|
|
99544f4655 | ||
|
|
191ff93822 | ||
|
|
aa3b0de1f5 | ||
|
|
0744355a67 | ||
|
|
d1ef8ea90b | ||
|
|
483bc3f549 | ||
|
|
45eb1a6ddb | ||
|
|
e932cbe162 | ||
|
|
8bbc40beb2 | ||
|
|
5eff06e809 | ||
|
|
39479b8d93 | ||
|
|
7127e36217 | ||
|
|
501b356b2b | ||
|
|
9473272512 | ||
|
|
6e03bb1c2c | ||
|
|
284d49b34b | ||
|
|
1e8c842444 | ||
|
|
929d399995 | ||
|
|
c8bfa43f22 | ||
|
|
c70922f3db | ||
|
|
d15b83b669 | ||
|
|
03993072c5 | ||
|
|
bffcc52536 | ||
|
|
3282a8d27c | ||
|
|
4be5e0bfa1 | ||
|
|
c384ec132f | ||
|
|
9c49ee3dd3 | ||
|
|
bee70ee72b | ||
|
|
e78bbf3492 | ||
|
|
eb497f2b9f | ||
|
|
d35d9b60ff | ||
|
|
57f1ef05c7 | ||
|
|
06158de6da | ||
|
|
ecfeb01aad | ||
|
|
4dc07d6e60 | ||
|
|
22ef78333d | ||
|
|
c33d9cbc6e | ||
|
|
5029c12de1 | ||
|
|
a3a7183293 | ||
|
|
de357b6193 | ||
|
|
16b116ee83 | ||
|
|
a46f83364e | ||
|
|
6793b34fb5 | ||
|
|
71d789063e | ||
|
|
3fbc2c8036 | ||
|
|
e19a981b8c | ||
|
|
ce2e01bcd7 | ||
|
|
adc2c12a67 | ||
|
|
215a58afad | ||
|
|
65314e0c90 | ||
|
|
a33caaaaaa | ||
|
|
28a29d071f | ||
|
|
93b7b5a8e7 | ||
|
|
c52936855e | ||
|
|
917da8cff3 | ||
|
|
0891245b49 | ||
|
|
f227bb4d9c | ||
|
|
9e500bc897 | ||
|
|
994b88f898 | ||
|
|
067189c355 | ||
|
|
d9e97ce772 | ||
|
|
1154343a20 | ||
|
|
de5f8df9d3 | ||
|
|
940b598cf8 | ||
|
|
7ce915c4f6 | ||
|
|
5955efabb0 | ||
|
|
8d149c59f8 | ||
|
|
54eb75b063 | ||
|
|
ff27656533 | ||
|
|
71102e3c6d | ||
|
|
85a10f292b | ||
|
|
e76b8ae423 | ||
|
|
ce0d8d9fb7 | ||
|
|
aace9bc76e | ||
|
|
afac7760ce | ||
|
|
47b8385c5a | ||
|
|
e22ece8e79 | ||
|
|
54a029356d | ||
|
|
4f3934ae6a | ||
|
|
898a10ef82 | ||
|
|
39ce605711 | ||
|
|
734ee0b001 | ||
|
|
bdba77c0f5 | ||
|
|
74a79a6921 | ||
|
|
e0d7ffe754 | ||
|
|
7dff7f3e1a | ||
|
|
7ceb4005cd | ||
|
|
2b304581cf | ||
|
|
1bcde1f2ff | ||
|
|
45a9878193 | ||
|
|
d3b60f913d | ||
|
|
1ddd395aad | ||
|
|
9e8b68b43d | ||
|
|
0b900c0a3c | ||
|
|
64b8e23458 | ||
|
|
08101061fe | ||
|
|
abbf003c24 | ||
|
|
18b2e3e5be | ||
|
|
2f35c785ee | ||
|
|
5b96809f6f | ||
|
|
dc0f2db3f2 | ||
|
|
ee22cf3b0c | ||
|
|
d470202bca | ||
|
|
964d65fd3b | ||
|
|
f7341141cf | ||
|
|
97a5957adc | ||
|
|
ae1155852a | ||
|
|
f38464cf55 | ||
|
|
d052e4c79d | ||
|
|
fa2559b3f0 | ||
|
|
670a292416 | ||
|
|
1151ec1328 | ||
|
|
619523b43e | ||
|
|
41972f62a3 | ||
|
|
c2c234cf5a | ||
|
|
e9c13b254b | ||
|
|
e376866436 | ||
|
|
4eb78f5c86 | ||
|
|
fad7e72fd4 | ||
|
|
dd4c2cf9fa | ||
|
|
883264c0df | ||
|
|
3a302c18bc | ||
|
|
665c6d6699 | ||
|
|
8c1e53f65a | ||
|
|
f9ff6f4161 | ||
|
|
f6663b187a | ||
|
|
ad3cac8578 | ||
|
|
23de7cb294 | ||
|
|
03c3a3e7c7 | ||
|
|
992fba7e07 | ||
|
|
ba9b35dc3b | ||
|
|
c8ff8540f9 | ||
|
|
d10ef8c501 | ||
|
|
b166451750 | ||
|
|
1fdae70cf8 | ||
|
|
7ea9c4baf2 | ||
|
|
7d57a2ea6d | ||
|
|
3f3378e13a | ||
|
|
884de148c9 | ||
|
|
21c168156c | ||
|
|
29d7639c38 | ||
|
|
ce15ad763f | ||
|
|
ef4c690c32 | ||
|
|
864d86e8b6 | ||
|
|
bcbf8edd8a | ||
|
|
8e11cd5ebb | ||
|
|
1c8aece53c | ||
|
|
2195e032a6 | ||
|
|
e330b96b3d | ||
|
|
0790e92cad | ||
|
|
9d5d116014 | ||
|
|
a30a0b852f | ||
|
|
6ac970a93f | ||
|
|
8f509dd41a | ||
|
|
dafecf2a19 | ||
|
|
8a7913c095 | ||
|
|
e9f93f5bc7 | ||
|
|
03b5dfacd6 | ||
|
|
09388f3c99 | ||
|
|
5a774c82cc | ||
|
|
f0ade07be8 | ||
|
|
2dc07fca0b | ||
|
|
94d0232ed3 | ||
|
|
50c66174dd | ||
|
|
2f8e817e16 | ||
|
|
d84495d0e1 | ||
|
|
fdd50840a7 | ||
|
|
91bb392678 | ||
|
|
507b5e3a46 | ||
|
|
412d252eea | ||
|
|
dee808dd1b | ||
|
|
2543bb5e63 | ||
|
|
0de39a1d33 | ||
|
|
281f13f4c3 | ||
|
|
8cbd209ede | ||
|
|
b9a4446fc1 | ||
|
|
8caf6b02c6 | ||
|
|
16aca7e6c8 | ||
|
|
ad4387a0c0 | ||
|
|
8b56dd5468 | ||
|
|
70818b49b9 | ||
|
|
5b29e65bc5 | ||
|
|
211dfd7f7e | ||
|
|
2da6c6b6f5 | ||
|
|
b97b3700c3 | ||
|
|
2ca6b6f2fe | ||
|
|
15454d2253 | ||
|
|
eab3adfa1d | ||
|
|
6fda74e281 | ||
|
|
a47c7d467f | ||
|
|
f13413922d | ||
|
|
322e39d9ad | ||
|
|
468d3d1077 | ||
|
|
aa7e5b87c4 | ||
|
|
bcaa07cdb4 | ||
|
|
5dc20aaef3 | ||
|
|
14b9124a03 | ||
|
|
e40f0c4f2e | ||
|
|
04eaf358d0 | ||
|
|
6727cc2da4 | ||
|
|
3d8ebf9157 | ||
|
|
c87d8f017f | ||
|
|
57819055da | ||
|
|
1b5879e176 | ||
|
|
1d74c400ac | ||
|
|
f64e588094 | ||
|
|
d896e51c5d | ||
|
|
9c56626bdb | ||
|
|
74e55f584c | ||
|
|
bb39913339 | ||
|
|
d99129f0e4 | ||
|
|
5ad1023088 | ||
|
|
bdd6dc625e | ||
|
|
88b2b04b0d | ||
|
|
fb801fa484 | ||
|
|
9d94cdbb52 | ||
|
|
3aed0ba150 | ||
|
|
b69bf49082 | ||
|
|
ad46f981f9 | ||
|
|
222d3d661e | ||
|
|
d4e490f47e | ||
|
|
bb043f96ee | ||
|
|
e3acb635a3 | ||
|
|
32aa23822b | ||
|
|
3350a78cd8 | ||
|
|
73fda0b188 | ||
|
|
476b809c4d | ||
|
|
c5f1128209 | ||
|
|
aa7851d1d7 | ||
|
|
707117c04f | ||
|
|
979f41efd1 | ||
|
|
e7e3d1105c | ||
|
|
47ed3d01af | ||
|
|
2e75990b6c | ||
|
|
e132849fe2 | ||
|
|
a7ccb3e811 | ||
|
|
95c9259193 | ||
|
|
50a5cce985 | ||
|
|
0fdb5e6592 | ||
|
|
93f255e95b | ||
|
|
654dc2b901 | ||
|
|
4390aa1015 | ||
|
|
f4b06ed1fc | ||
|
|
8b2b2f650f | ||
|
|
bb4b08e8cf | ||
|
|
47b8f052bd | ||
|
|
611bfe49df | ||
|
|
8014fce6e1 | ||
|
|
38116b88a5 | ||
|
|
b74caae685 | ||
|
|
a40ba8f4e9 | ||
|
|
035bb2236d | ||
|
|
8fa0a4dc9e | ||
|
|
4634c5cdd3 | ||
|
|
2175fa6701 | ||
|
|
83d6566020 | ||
|
|
d6e633edd0 | ||
|
|
adc60bf394 | ||
|
|
abd00e28b8 | ||
|
|
70605fc8ed | ||
|
|
5da60b53d0 | ||
|
|
3f00f3f605 | ||
|
|
800f740fd5 | ||
|
|
9488ee0f9e | ||
|
|
f43755625c | ||
|
|
6266d482ae | ||
|
|
dba594a857 | ||
|
|
5ae2c415b7 | ||
|
|
6b831173e4 | ||
|
|
c2d84fa288 | ||
|
|
78a56640c3 | ||
|
|
368caddd00 | ||
|
|
51659ed123 | ||
|
|
21506386c4 | ||
|
|
3e583b1373 | ||
|
|
c87c464f23 | ||
|
|
747f26ef74 | ||
|
|
170aeea864 | ||
|
|
14ec246659 | ||
|
|
9a77a59cb2 | ||
|
|
0ff87d8f40 | ||
|
|
ffb1f7d465 | ||
|
|
d5cd9d0221 | ||
|
|
80fdf7517a | ||
|
|
09e8ba1208 | ||
|
|
a4a20ddc42 | ||
|
|
b05c2f5327 | ||
|
|
865593146a | ||
|
|
94276c51ac | ||
|
|
e48095062b | ||
|
|
afb9bdce86 | ||
|
|
421f0895b7 | ||
|
|
ea00efa857 | ||
|
|
b157d4c161 | ||
|
|
c78e11872c | ||
|
|
1f3b6d01ab | ||
|
|
0186586fd9 | ||
|
|
91ad77d86a | ||
|
|
ff4b4fd9d9 | ||
|
|
7a6bc6f554 | ||
|
|
01d7fefe52 | ||
|
|
47cfb4ae35 | ||
|
|
5f22e35c26 | ||
|
|
b937ad101f | ||
|
|
c79c052528 | ||
|
|
a5bff2f7e5 | ||
|
|
db9557fc84 | ||
|
|
c2dd3913d7 | ||
|
|
9fc76990e1 | ||
|
|
366153a94f | ||
|
|
c54dc10464 | ||
|
|
55f582faaf | ||
|
|
dd0fb49098 | ||
|
|
b0a58d4664 | ||
|
|
5db956f546 | ||
|
|
f98dfd47fc | ||
|
|
3b85c51e5f | ||
|
|
1f3dc927c0 | ||
|
|
5437976e65 | ||
|
|
0508586108 | ||
|
|
5233f8d4ad | ||
|
|
c43f5e14f2 | ||
|
|
2d25e969e9 | ||
|
|
a2c4f8c2d1 | ||
|
|
b389ccbb38 | ||
|
|
1bbece649f | ||
|
|
e4ec666479 | ||
|
|
a2187565d1 | ||
|
|
aa8b84aa6c | ||
|
|
b8647f968a | ||
|
|
a074d18dfa | ||
|
|
5fd5091efd | ||
|
|
fd027c65e7 | ||
|
|
3398c969ac | ||
|
|
ef56c7fa7c | ||
|
|
078014826b | ||
|
|
0794cf56ff | ||
|
|
a0f7cb41c3 | ||
|
|
de18dd46b6 | ||
|
|
633902cce5 | ||
|
|
58cf97ef5d | ||
|
|
3ef76df0d1 | ||
|
|
c4bddf59e2 | ||
|
|
6c76c8098b | ||
|
|
0a8287c72d | ||
|
|
2c11418d33 | ||
|
|
f9cf6df3ad | ||
|
|
5bc01124c2 | ||
|
|
ca66dc9ba0 | ||
|
|
ac37898331 | ||
|
|
126a587aa3 | ||
|
|
61828070c5 | ||
|
|
f8231bb109 | ||
|
|
895e06d778 | ||
|
|
f6400880b0 | ||
|
|
bcfd32e49f | ||
|
|
e51844eba3 | ||
|
|
ff9fea0abd | ||
|
|
a6c0ac4273 | ||
|
|
14ec7ea6cd | ||
|
|
497e73182a | ||
|
|
072fed288a | ||
|
|
47d56f6259 | ||
|
|
f0c7a751c1 | ||
|
|
9d2c03bc5b | ||
|
|
a2385a8b24 | ||
|
|
0e1c9e2cd1 | ||
|
|
0791871955 | ||
|
|
1e965edb54 | ||
|
|
a339d397ce | ||
|
|
981f1e2e32 | ||
|
|
ac9f12a5f6 | ||
|
|
010376518f | ||
|
|
4201ebb28d | ||
|
|
241f59b59f | ||
|
|
554366ad9f | ||
|
|
d2ebc58be3 | ||
|
|
0f42dc2e4c | ||
|
|
3177865fc2 | ||
|
|
a715df6338 | ||
|
|
6dd765c101 | ||
|
|
bae08c27c8 | ||
|
|
436bd6b9ff | ||
|
|
643c0867ed | ||
|
|
2cd789662b | ||
|
|
08c58fe8e3 | ||
|
|
25346c7a55 | ||
|
|
450229f5e4 | ||
|
|
3a1f980a36 | ||
|
|
3e2260bdcb | ||
|
|
d300d17f9e | ||
|
|
c3abcd9dc1 | ||
|
|
bb338a1c97 | ||
|
|
a1bad4b7be | ||
|
|
0551cd8e44 | ||
|
|
3f01d3bcb1 | ||
|
|
006919ec55 | ||
|
|
5f4145ef0a | ||
|
|
b353e8752d | ||
|
|
dc236a27cc | ||
|
|
b05625d444 | ||
|
|
51d5ef5b4e | ||
|
|
02e1a7669c | ||
|
|
a34cfacb5c | ||
|
|
fe98921e4c | ||
|
|
f59f4f266e | ||
|
|
2a06f1545b | ||
|
|
1da1d31e1c | ||
|
|
a7fffe7e13 | ||
|
|
09cf160088 | ||
|
|
1c0ec60b23 | ||
|
|
6d08f0a196 | ||
|
|
2e418f24fa | ||
|
|
b69aaf04e9 | ||
|
|
34b512d04b | ||
|
|
8d9a611874 | ||
|
|
69c5e667ae | ||
|
|
fc11537e12 | ||
|
|
54870e128c | ||
|
|
7d47e78025 | ||
|
|
7fca642c3d | ||
|
|
b7f32c2a4c | ||
|
|
29dfb151cb | ||
|
|
fbc7a1e452 | ||
|
|
b91c88cdf6 | ||
|
|
2ad3f9b51f | ||
|
|
f1f4f48c38 | ||
|
|
b19d5731ac | ||
|
|
a6468b6b6e | ||
|
|
eba427cc5b | ||
|
|
6d435e08c8 | ||
|
|
38bfa7bdb6 | ||
|
|
8c260c9d12 | ||
|
|
268af5b67c | ||
|
|
78a0773f38 | ||
|
|
e02e595eec | ||
|
|
aae161e31a | ||
|
|
bda5c1d13e | ||
|
|
f2b044438d | ||
|
|
3711a66592 | ||
|
|
27c125f23b | ||
|
|
3f236b7e91 | ||
|
|
a11b52d94f | ||
|
|
9f15d402de | ||
|
|
b016d9e295 | ||
|
|
430aa5564d | ||
|
|
252e02ba2e | ||
|
|
700f189921 | ||
|
|
562e675a0e | ||
|
|
50d9932378 | ||
|
|
2b8cf5d132 | ||
|
|
22e9135ab1 | ||
|
|
16686739f6 | ||
|
|
e7f4f58886 | ||
|
|
387fdb30b0 | ||
|
|
9f0a6b8231 | ||
|
|
5dd70c4306 | ||
|
|
d3c278cb02 | ||
|
|
39cb9f4a32 | ||
|
|
b2c2bfc2aa | ||
|
|
2303f18d39 | ||
|
|
59a49f0698 | ||
|
|
fe2dc8c489 | ||
|
|
327e30fe7c | ||
|
|
d204742bb8 | ||
|
|
2dd5912fee | ||
|
|
6392e4d38d | ||
|
|
c8d1f4d3d6 | ||
|
|
0ec89e9aae | ||
|
|
b2a8b53efa | ||
|
|
0ba0125eb5 | ||
|
|
a8e8a4a167 | ||
|
|
9e8089ec1a | ||
|
|
b6ce693ede | ||
|
|
0f2095f84a | ||
|
|
57779a8ed9 | ||
|
|
d08f3a6ea0 | ||
|
|
b7e56d3030 | ||
|
|
b957087456 | ||
|
|
eb48d24182 | ||
|
|
0ad24f6485 | ||
|
|
44096dfcf2 | ||
|
|
c1ed44bd35 | ||
|
|
11aedf2b27 | ||
|
|
6fe5d0575f | ||
|
|
17a5faa5e0 | ||
|
|
f62df6125f | ||
|
|
30771bf7cb | ||
|
|
02cf838ceb | ||
|
|
ddfd755ddc | ||
|
|
99b09290b7 | ||
|
|
bb978b0eff | ||
|
|
36ddd0b98b | ||
|
|
48e4d33bea | ||
|
|
9b8eb69886 | ||
|
|
d43461584a | ||
|
|
2e1aa844fe | ||
|
|
c23f4b2390 | ||
|
|
78eb774822 | ||
|
|
9837e06816 | ||
|
|
013bb5a4f2 | ||
|
|
078109eca2 | ||
|
|
86aad15e0c | ||
|
|
28db0f6227 | ||
|
|
8dba099a56 | ||
|
|
22cae56ab1 | ||
|
|
fd3b9ca1e5 | ||
|
|
17009b315f | ||
|
|
86ca85f39d | ||
|
|
058369bc7a | ||
|
|
5249eb4428 | ||
|
|
5675044443 | ||
|
|
d065d28244 | ||
|
|
78ea40d70c | ||
|
|
efbf13fcd4 | ||
|
|
45b8622bc9 | ||
|
|
4b78559228 | ||
|
|
31a1517d2f | ||
|
|
720ed0460b | ||
|
|
992f4c8c2d | ||
|
|
4c381e13e9 | ||
|
|
52a853b26f | ||
|
|
39923e942b | ||
|
|
27be025805 | ||
|
|
a466362938 | ||
|
|
51bae7855d | ||
|
|
089681267f | ||
|
|
10ca8b5440 | ||
|
|
2a54f2df5d | ||
|
|
d495431f24 | ||
|
|
58bb5d40c5 | ||
|
|
5ec1bd0f5e | ||
|
|
55be253fb2 | ||
|
|
c098a03d8f | ||
|
|
e58b9e870e | ||
|
|
7018caadb9 | ||
|
|
696c351e6a | ||
|
|
e6b0bfc90d | ||
|
|
ef96ecd9b8 | ||
|
|
2db117a7ac | ||
|
|
3de4283bd2 | ||
|
|
6a96b5f1dc | ||
|
|
40281ce2a1 | ||
|
|
db4971e6c0 | ||
|
|
c333606600 | ||
|
|
a49638e9a6 | ||
|
|
6d5bccdd51 | ||
|
|
ca8156efdf | ||
|
|
4f417854e5 | ||
|
|
b091c0d4a8 | ||
|
|
7ac03be43b | ||
|
|
3e134cdce5 | ||
|
|
79db7f4457 | ||
|
|
53d0b91f26 | ||
|
|
39da6c7c19 | ||
|
|
add5b29d03 | ||
|
|
5a3dedea97 | ||
|
|
033433b883 | ||
|
|
a05ec0eed2 | ||
|
|
ce449ac0e2 | ||
|
|
ea507289c6 | ||
|
|
cda285ceb7 | ||
|
|
582e46005e | ||
|
|
f114ec3f80 | ||
|
|
3e0e120222 | ||
|
|
383918bca4 | ||
|
|
bf52883331 | ||
|
|
c515a53986 | ||
|
|
c999a74d34 | ||
|
|
d47e6819a8 | ||
|
|
954c43912d | ||
|
|
2417f63bf2 | ||
|
|
43ab2540b0 | ||
|
|
eae664bd1b | ||
|
|
c603eaa1b6 | ||
|
|
4c92a2619f | ||
|
|
1999352004 | ||
|
|
4068ab30e8 | ||
|
|
f45276a596 | ||
|
|
a7cedddcef | ||
|
|
1c23d7e67a | ||
|
|
96a330e034 | ||
|
|
f248159d55 | ||
|
|
9672dba765 | ||
|
|
57e14c9e5c | ||
|
|
4626ff4e93 | ||
|
|
13b5ac92d8 | ||
|
|
4ae13ff3dd | ||
|
|
d2a53b79d7 | ||
|
|
8aeba9dc79 | ||
|
|
159cf8c477 | ||
|
|
d262ca53ea | ||
|
|
c32d9c74b1 | ||
|
|
8a81d2210c | ||
|
|
6c0c508280 | ||
|
|
59e859e59a | ||
|
|
fe32385a12 | ||
|
|
000666a5e2 | ||
|
|
df7f087b08 | ||
|
|
a2bf5ac199 | ||
|
|
e11bf1d62d | ||
|
|
6444051382 | ||
|
|
92e2597192 | ||
|
|
1f4dbd1b2e | ||
|
|
a48e804e96 | ||
|
|
ac84e77333 | ||
|
|
4e4ee2ff2c | ||
|
|
77d30adee9 | ||
|
|
6daa429b69 | ||
|
|
4384b812f1 | ||
|
|
335866be88 | ||
|
|
adb45ed5df | ||
|
|
2f674833b2 | ||
|
|
eb5f7a27e7 | ||
|
|
b89874850f | ||
|
|
ead597bacc | ||
|
|
452341059b | ||
|
|
693db74380 | ||
|
|
377ed48416 | ||
|
|
aeb3bc410b | ||
|
|
c06ea4a3e8 | ||
|
|
8e1333468d | ||
|
|
e785a57c65 | ||
|
|
b8cc3f59d6 | ||
|
|
0776dddc35 | ||
|
|
42ccf332b9 | ||
|
|
20a7bf2b23 | ||
|
|
a0cdaa8c0b | ||
|
|
1bb7d05ba0 | ||
|
|
5046664b8b | ||
|
|
4631d884d6 | ||
|
|
c7afda2562 | ||
|
|
52132b9a29 | ||
|
|
e30161c34e | ||
|
|
c6bb08355c | ||
|
|
2671245d41 | ||
|
|
4dee706143 | ||
|
|
1da62c2190 | ||
|
|
a6deabaeee | ||
|
|
ff37ab7bc6 | ||
|
|
127224acc6 | ||
|
|
14c896573d | ||
|
|
2ffaa88c9e | ||
|
|
6c95ab4d1e | ||
|
|
c83012a83a | ||
|
|
09d54cf9d9 | ||
|
|
e7240cb77d | ||
|
|
0fe606dd8b | ||
|
|
3c22134f78 | ||
|
|
9668234903 | ||
|
|
d25614d50f | ||
|
|
f945278959 | ||
|
|
2b584a94c5 | ||
|
|
71bf352af2 | ||
|
|
fd363d7d85 | ||
|
|
31dc44feee | ||
|
|
50123298a7 | ||
|
|
aa0bf47ec3 | ||
|
|
0df309b45c | ||
|
|
abd44069ae | ||
|
|
0b3431289f | ||
|
|
fdc3b01054 | ||
|
|
c36ed4b8b8 | ||
|
|
8f9ba5d54a | ||
|
|
c23984dd6a | ||
|
|
5887c9d12c | ||
|
|
e93b574c5d | ||
|
|
e9a32d5dc7 | ||
|
|
82f463724c | ||
|
|
83316f1299 | ||
|
|
00dd46b521 | ||
|
|
d1ff99d6e3 | ||
|
|
8d5755521e | ||
|
|
de3a48f958 | ||
|
|
452d571e79 | ||
|
|
58bf6f8fad | ||
|
|
07756476b0 | ||
|
|
39361f71a1 | ||
|
|
9b53792f20 | ||
|
|
2589b45af0 | ||
|
|
bc14ca86f3 | ||
|
|
c18584fdbc | ||
|
|
4843a1d458 | ||
|
|
90f5ae536c | ||
|
|
6cc4de8d28 | ||
|
|
935eb4cc61 | ||
|
|
aed91a4d09 | ||
|
|
bdc86f5338 | ||
|
|
0d112e20a5 | ||
|
|
3412d2232f | ||
|
|
7e4088ed61 | ||
|
|
4fac1517d9 | ||
|
|
7947087694 | ||
|
|
71a11554cb | ||
|
|
49a0072bab | ||
|
|
5f48f8a3aa | ||
|
|
fea3444625 | ||
|
|
4fa049188b | ||
|
|
e438e1c53d | ||
|
|
c5e8739435 | ||
|
|
8aa70211c0 | ||
|
|
bc18862dc6 | ||
|
|
7dd929a39e | ||
|
|
2e5ce52086 | ||
|
|
17e7add105 | ||
|
|
710a2a64e4 | ||
|
|
4346620afa | ||
|
|
fd8a9465d4 | ||
|
|
9ae71ac4ee | ||
|
|
5f6b3d2cd5 | ||
|
|
474383b656 | ||
|
|
2df6f9068a | ||
|
|
be5793987e | ||
|
|
9d718eb1e8 | ||
|
|
374ddbe2a6 | ||
|
|
8173f24515 | ||
|
|
a66f4e0614 | ||
|
|
27466fc56b | ||
|
|
852e37c8dd | ||
|
|
8288f7c6b7 | ||
|
|
c99537c402 | ||
|
|
56dce646cc | ||
|
|
4ddafdeeaf | ||
|
|
7c506d5426 | ||
|
|
75312c7bb6 | ||
|
|
a35795f793 | ||
|
|
4511a4de8c | ||
|
|
d04e8ab1d0 | ||
|
|
847069a59d | ||
|
|
26cee8acf0 | ||
|
|
ef05570540 | ||
|
|
03ff08a934 | ||
|
|
d1fcf80c2d | ||
|
|
c0f49c648b | ||
|
|
4f052a6b9c | ||
|
|
14a068062e | ||
|
|
02c99fca67 | ||
|
|
890cec9872 | ||
|
|
9231261d73 | ||
|
|
fe80a7ed46 | ||
|
|
74aa3f861e | ||
|
|
98f31b3d43 | ||
|
|
8f36ee2c89 | ||
|
|
eb7e8162ff | ||
|
|
abf8c42a7c | ||
|
|
7d0dac9ae2 | ||
|
|
4cf520db93 | ||
|
|
d6c0720f8a | ||
|
|
af58649113 | ||
|
|
ca894be51d | ||
|
|
c5b2d14f8c | ||
|
|
2b7f5091f1 | ||
|
|
3402d16548 | ||
|
|
85ffe0ea04 | ||
|
|
ece32b47df | ||
|
|
4695f60937 | ||
|
|
9c9e854005 | ||
|
|
d60641cac4 | ||
|
|
e6200899de | ||
|
|
9e1c24f93e | ||
|
|
2e3b13ced5 | ||
|
|
d63a15a570 | ||
|
|
d150e688f5 | ||
|
|
00d91cb9ab | ||
|
|
085e1fd43f | ||
|
|
adbde78a7a | ||
|
|
24cd73e366 | ||
|
|
0fa1d25d98 | ||
|
|
462cfe2fc1 | ||
|
|
37d9a20b4a | ||
|
|
7f5e23f2ac | ||
|
|
625d380f7a | ||
|
|
759fd15c03 | ||
|
|
02d200878d | ||
|
|
96034bca3e | ||
|
|
6968a42abe | ||
|
|
bd34c24b84 | ||
|
|
31d2bc1740 | ||
|
|
358de714c8 | ||
|
|
2ec3813cb3 | ||
|
|
5e2390604d | ||
|
|
a650717c1d | ||
|
|
6b3f47f675 | ||
|
|
90277953c7 | ||
|
|
26b13abac8 | ||
|
|
0a02cf32be | ||
|
|
6532f622cb | ||
|
|
096e8cd8ae | ||
|
|
d0930e9053 | ||
|
|
4273939cf5 | ||
|
|
d4f9810006 | ||
|
|
4502858407 | ||
|
|
64d6caf695 | ||
|
|
b425374cb7 | ||
|
|
597911e5a8 | ||
|
|
e270603117 | ||
|
|
ca39c9877e | ||
|
|
437771ea85 | ||
|
|
054a6f8563 | ||
|
|
1ee80e0df5 | ||
|
|
3b0ed5337b | ||
|
|
13febeb902 | ||
|
|
baf8e6fe80 | ||
|
|
23fe3e623d | ||
|
|
9e9c7db5b1 | ||
|
|
f186540c4f | ||
|
|
9b9474ada1 | ||
|
|
28d0f387ea | ||
|
|
ad77b4f73b | ||
|
|
1f671fbacc | ||
|
|
539ccf5a61 | ||
|
|
ba0715b295 | ||
|
|
ecc9369da2 | ||
|
|
4bf0af321f | ||
|
|
217c16de20 | ||
|
|
100afe304e | ||
|
|
c5dbd749e7 | ||
|
|
54573fb970 | ||
|
|
d96fcc95c2 | ||
|
|
3bd5da9896 | ||
|
|
a7d5f38d51 | ||
|
|
b138db9c4d | ||
|
|
b26f642c6e | ||
|
|
d4f99e4459 | ||
|
|
9b1fe23b03 | ||
|
|
0b82902618 | ||
|
|
b4d8cdd714 | ||
|
|
25f6c5f884 | ||
|
|
3140dbb34a | ||
|
|
c870b10387 | ||
|
|
26584b00c3 | ||
|
|
ebf2c3ad5b | ||
|
|
4d892cbae9 | ||
|
|
232379c72c | ||
|
|
c03b8b72da | ||
|
|
3bb0f33dcc | ||
|
|
c0a2e9b3f7 | ||
|
|
cc0f1f514c | ||
|
|
f9b19631c0 | ||
|
|
51e04673c6 | ||
|
|
8e825d91e0 | ||
|
|
39d24d0d4a | ||
|
|
5c6bfcbeee | ||
|
|
354115ef9b | ||
|
|
1492536bdb | ||
|
|
eb79507501 | ||
|
|
3b345aab37 | ||
|
|
cc805b7b74 | ||
|
|
fa1747bdb2 | ||
|
|
e88df069bd | ||
|
|
925909aa3a | ||
|
|
1655bf284f | ||
|
|
838c0aaaa9 | ||
|
|
f562ff780c | ||
|
|
88f3e554da | ||
|
|
5227bc3549 | ||
|
|
6cab3dcd3a | ||
|
|
00ab76fa19 | ||
|
|
f3bb4030f6 | ||
|
|
e0ec98d766 | ||
|
|
2c8d519d70 | ||
|
|
0d2e7f72a7 | ||
|
|
bfdebc5775 | ||
|
|
86644ecda0 | ||
|
|
caace1ce11 | ||
|
|
5a995d804b | ||
|
|
06cd57fe6e | ||
|
|
d80e7cc025 | ||
|
|
eff040dca6 | ||
|
|
ad3aa73085 | ||
|
|
f149f353eb | ||
|
|
abfa4c65df | ||
|
|
6f4a10be04 | ||
|
|
76afef9117 | ||
|
|
ae0392a0e5 | ||
|
|
6e7d5fecb4 | ||
|
|
d0c2a26ced | ||
|
|
ffdd5290bf | ||
|
|
4fc675814a | ||
|
|
53e5eb749d | ||
|
|
99f2734d28 | ||
|
|
78c2f68649 | ||
|
|
28d749982a | ||
|
|
e524f4a050 | ||
|
|
99613291fb | ||
|
|
c658f43bc3 | ||
|
|
ec5c7cf8a7 | ||
|
|
817e052457 | ||
|
|
145514687c | ||
|
|
64cfad317d | ||
|
|
f5e6029a51 | ||
|
|
aaa163a140 | ||
|
|
642fd1e8da | ||
|
|
edae590b2c | ||
|
|
4c7f698946 | ||
|
|
f58c96e4b3 | ||
|
|
dd650e08ec | ||
|
|
6f554d7f00 | ||
|
|
068e752aa7 | ||
|
|
db408df395 | ||
|
|
7425cd7112 | ||
|
|
1bad607e6c | ||
|
|
8624bb95ba | ||
|
|
9556878b23 | ||
|
|
9fc9f71666 | ||
|
|
7a6c96a989 | ||
|
|
0bb6162a01 | ||
|
|
9aecd43f24 | ||
|
|
8b0da6f508 | ||
|
|
1f68b739af | ||
|
|
eca27b9d12 | ||
|
|
e9be4934c4 | ||
|
|
b1342a10b0 | ||
|
|
b418c55e89 | ||
|
|
182efc0b04 | ||
|
|
8627a31912 | ||
|
|
ea4b64909f | ||
|
|
b55fa19f72 | ||
|
|
555203c46a | ||
|
|
024182b6a0 | ||
|
|
d9eb04f68e | ||
|
|
d680fa6cd6 | ||
|
|
a84eec1aef | ||
|
|
f86fbe7fd5 | ||
|
|
5f91b48e0c | ||
|
|
4a6bc284af | ||
|
|
ec8c661b38 | ||
|
|
87989a5f09 | ||
|
|
289637384d | ||
|
|
1475635382 | ||
|
|
f017b27ae2 | ||
|
|
3050f19e0b | ||
|
|
36fb1f96e2 | ||
|
|
8d5f4d7363 | ||
|
|
9a5104dacf | ||
|
|
c04824890d | ||
|
|
a218ca4412 | ||
|
|
7b0483dc16 | ||
|
|
6946c1b205 | ||
|
|
6e0c238f71 | ||
|
|
e7ce7282a1 | ||
|
|
212b497f1f | ||
|
|
022528bb47 | ||
|
|
ac9479aa3f | ||
|
|
c1ba647f5b | ||
|
|
61d71d8716 | ||
|
|
ff56f1d2f8 | ||
|
|
1915a9e5f6 | ||
|
|
3e45e4abd9 | ||
|
|
f2a226407f | ||
|
|
3aa427fdd6 | ||
|
|
0e79b3ab27 | ||
|
|
35a2baf5ef | ||
|
|
03656a4b0f | ||
|
|
885fa16373 | ||
|
|
ced2d52043 | ||
|
|
536e3e3da3 | ||
|
|
3ffbb56fa0 | ||
|
|
aa94df0845 | ||
|
|
ab2fbd6164 | ||
|
|
13370dc01c | ||
|
|
ae5c446652 | ||
|
|
7c49f4e816 | ||
|
|
a90004b983 | ||
|
|
2377911313 | ||
|
|
c43bd2bfe2 | ||
|
|
ad3f37aec6 | ||
|
|
cf07e2564d | ||
|
|
443771ecff | ||
|
|
f4fdd0221b | ||
|
|
1c1e5d5ee7 | ||
|
|
e31b529b4a | ||
|
|
23601904cd | ||
|
|
54a767bf81 | ||
|
|
9420234a98 | ||
|
|
3010c7ce63 | ||
|
|
ea424a4c8d | ||
|
|
e5ae1ea2cc | ||
|
|
6de489a34f | ||
|
|
6358b4588d | ||
|
|
6b3165d3cc | ||
|
|
38e7d12b0b | ||
|
|
7d78923967 | ||
|
|
6f31eb2a9d | ||
|
|
46dc85c10c | ||
|
|
55d03cb03e | ||
|
|
77a6039e54 | ||
|
|
cb501298b5 | ||
|
|
25082b2bec | ||
|
|
d272e19bfa | ||
|
|
91f86ce5b5 | ||
|
|
4f8eb53e8b | ||
|
|
3ecad1095a | ||
|
|
727ac634a9 | ||
|
|
5f48b08215 | ||
|
|
2f718b57c1 | ||
|
|
fa3811ef61 | ||
|
|
3a460d3183 | ||
|
|
8b56386ccd | ||
|
|
ede90710e5 | ||
|
|
f3d2ca29dd | ||
|
|
ed5f534bbb | ||
|
|
1d97efa059 | ||
|
|
24727175d7 | ||
|
|
9e76ddb9fd | ||
|
|
36942a5ddb | ||
|
|
0ff6d6364b | ||
|
|
bb121b29e3 | ||
|
|
17afcbce4f | ||
|
|
086c9ce4c8 | ||
|
|
cdede08ad6 | ||
|
|
46ed2fb802 | ||
|
|
e083b2773a | ||
|
|
870c9c734f | ||
|
|
46f3390365 | ||
|
|
33e7d175d4 | ||
|
|
10f4dbef72 | ||
|
|
6c6b78bcbd | ||
|
|
d1917e1856 | ||
|
|
ce8f10f2b1 | ||
|
|
2b46e63c49 | ||
|
|
115c66324d | ||
|
|
1032eb95d2 | ||
|
|
ee83c89fae | ||
|
|
f7f0ec0cec | ||
|
|
ef9bcb6696 | ||
|
|
65d146ddca | ||
|
|
02e3f45ed2 | ||
|
|
aba221dbb4 | ||
|
|
e8db86d092 | ||
|
|
30dbf48fda | ||
|
|
a4d12c26c7 | ||
|
|
14e096d052 | ||
|
|
c5ce93136f | ||
|
|
0137404449 | ||
|
|
5f37faa6ad | ||
|
|
ab57bfbe73 | ||
|
|
ced47f227c | ||
|
|
95af214abe | ||
|
|
ccd20af7a1 | ||
|
|
333f8a0da0 | ||
|
|
4eaa3d90dd | ||
|
|
4ef2ce4622 | ||
|
|
d4f4b73d56 | ||
|
|
bb69b5fae2 | ||
|
|
9236f0eb8c | ||
|
|
129b80ef0f | ||
|
|
0ba95a5b29 | ||
|
|
b9cdf2cbb7 | ||
|
|
399b87fecc | ||
|
|
68ff33d0ba | ||
|
|
e8fcc250a1 | ||
|
|
208d155b96 | ||
|
|
cb21fe9eb6 | ||
|
|
0507460a16 | ||
|
|
9cf7629e9b | ||
|
|
7ac47ab6bb | ||
|
|
db3e802643 | ||
|
|
712403e9fd | ||
|
|
5148ecdc50 | ||
|
|
1f8077fd21 | ||
|
|
59371eef5a | ||
|
|
1a0cb07319 | ||
|
|
a1e2c533f6 | ||
|
|
5b83ae4381 | ||
|
|
2aad20e2f7 | ||
|
|
ade01e98ad | ||
|
|
3e47fd6bcd | ||
|
|
a7d2d5f08b | ||
|
|
28a9800eb7 | ||
|
|
cc87cebee3 | ||
|
|
7963617f86 | ||
|
|
4551553eb4 | ||
|
|
c5c9dfb0c9 | ||
|
|
c6b8c6a3a6 | ||
|
|
f500f6e837 | ||
|
|
ace780ea74 | ||
|
|
07e69a88c2 | ||
|
|
2379d422cf | ||
|
|
7f689dadbe | ||
|
|
1cc5dea616 | ||
|
|
7a9dcbb572 | ||
|
|
fb6a92380d | ||
|
|
b749e816e9 | ||
|
|
0a6fd9ef5f | ||
|
|
3e4913dc8a | ||
|
|
37eef59d54 | ||
|
|
c7a22cc1ff | ||
|
|
bf778e2989 | ||
|
|
d1fb639b78 | ||
|
|
bd7b0a8197 | ||
|
|
3c4a7463b8 | ||
|
|
c79449a6a1 | ||
|
|
7940977dba | ||
|
|
3282360382 | ||
|
|
a9cd5b331f | ||
|
|
31b6ff4bae | ||
|
|
3854ae11b2 | ||
|
|
c62732804a | ||
|
|
1e29d12819 | ||
|
|
8ca67e49e4 | ||
|
|
2d22372de3 | ||
|
|
37a658d933 | ||
|
|
a36b73f745 | ||
|
|
134f108a30 | ||
|
|
f670d82796 | ||
|
|
2318bc8553 | ||
|
|
41f9dc0aa1 | ||
|
|
d9f71643ac | ||
|
|
a46731996d | ||
|
|
db930f7f93 | ||
|
|
18fd928a9d | ||
|
|
31e1558467 | ||
|
|
d7d6a0fa9d | ||
|
|
a15e83e590 | ||
|
|
49e3bc8b21 | ||
|
|
5f0cab6b63 | ||
|
|
973fd941d5 | ||
|
|
a518564006 | ||
|
|
8fd019b474 | ||
|
|
26ab2f633f | ||
|
|
12037b88ff | ||
|
|
49945c7198 | ||
|
|
60ae748635 | ||
|
|
95e3fb1663 | ||
|
|
9f53150a6a | ||
|
|
8286b0b72f | ||
|
|
0befc75f83 | ||
|
|
9cf46e6ae6 | ||
|
|
ce997cf6b0 | ||
|
|
f7ff0781b6 | ||
|
|
b946c1d39e | ||
|
|
e1e3bbbe3e | ||
|
|
ae33aa4869 | ||
|
|
4f962d2fcc | ||
|
|
85ae52b725 | ||
|
|
fd7ad586af | ||
|
|
fa53c2e683 | ||
|
|
9504d6c68f | ||
|
|
f2801491bf | ||
|
|
44f1d3dc1c | ||
|
|
d4a1168491 | ||
|
|
003bed573c | ||
|
|
b1c88cd1f2 | ||
|
|
ded49edf4c | ||
|
|
672093cf0e | ||
|
|
4eb8acc973 | ||
|
|
1b6c79fa7b | ||
|
|
226a7b7cfb | ||
|
|
c65d524725 | ||
|
|
d32e3cb867 | ||
|
|
9b52152e77 | ||
|
|
7c277234e7 | ||
|
|
4ed4e1005c | ||
|
|
675e6a8d1a | ||
|
|
06ad948abc | ||
|
|
50efa8f672 | ||
|
|
0c1f3ac16d | ||
|
|
f8edd3a37b | ||
|
|
065ba4ce5a | ||
|
|
8ab2cf09b7 | ||
|
|
60c2b53d47 | ||
|
|
7be19193d9 | ||
|
|
85e7125fee | ||
|
|
40e9c8807d | ||
|
|
3a317c81c6 | ||
|
|
b3698ebb0f | ||
|
|
3e65532eaa | ||
|
|
2131eed4e7 | ||
|
|
e80a19234e | ||
|
|
467d675262 | ||
|
|
23c59a6fc9 | ||
|
|
ec895372b7 | ||
|
|
6ec734c264 | ||
|
|
c20da5ea70 | ||
|
|
31e2bc1141 | ||
|
|
a1488a74a1 | ||
|
|
b6213cfbc5 | ||
|
|
4b0654afe5 | ||
|
|
5df7444cbb | ||
|
|
17ee1aed5f | ||
|
|
4a9138fc51 | ||
|
|
77a449a8f0 | ||
|
|
675cbc51cc | ||
|
|
655864eb7c | ||
|
|
3fd74a92f9 | ||
|
|
0f6101b19a | ||
|
|
bb2d44ae08 | ||
|
|
351e220d30 | ||
|
|
c98f1f0b6b | ||
|
|
9702461b09 | ||
|
|
ea2c329510 | ||
|
|
4a4fe82a1b | ||
|
|
dfad9695d2 | ||
|
|
46d33f45bf | ||
|
|
10a3516099 | ||
|
|
d741fb0af9 | ||
|
|
d4f58ddaf3 | ||
|
|
df0e28a61f | ||
|
|
3abdd7cdaf | ||
|
|
f53637891a | ||
|
|
5d3392428e | ||
|
|
6015c964a9 | ||
|
|
ca198e44d3 | ||
|
|
b5590ed197 | ||
|
|
3e9bb80217 | ||
|
|
6bbf273581 | ||
|
|
25c39a3311 | ||
|
|
c558a9a436 | ||
|
|
cfce429fca | ||
|
|
661c4a417b | ||
|
|
907574e637 | ||
|
|
a7a739eea9 | ||
|
|
51090e5fb9 | ||
|
|
8128b3894a | ||
|
|
946e328198 | ||
|
|
6a500ccdb3 | ||
|
|
50b96c757e | ||
|
|
a62c4135ad | ||
|
|
09d6f3f917 | ||
|
|
9bc6a3e0ee | ||
|
|
68bc82c11b | ||
|
|
9034c9d9e6 | ||
|
|
209c1fdc72 | ||
|
|
f34609fd9b | ||
|
|
b8d459d48b | ||
|
|
9b5ec8451f | ||
|
|
dfc35c1bf1 | ||
|
|
5620597fb4 | ||
|
|
390558b627 | ||
|
|
1b33bfd522 | ||
|
|
d9c3238462 | ||
|
|
29dcc5c8bc | ||
|
|
acce6867f1 | ||
|
|
13a7444f03 | ||
|
|
c294e62f5e | ||
|
|
1a6f04f6ce | ||
|
|
3a304ad054 | ||
|
|
04cb7732b8 | ||
|
|
4dde1ca070 | ||
|
|
b7cd28ef72 | ||
|
|
8d7299fe1f | ||
|
|
4bc1588a5e | ||
|
|
ce82bb816b | ||
|
|
0df29d1e0b | ||
|
|
04c92f8d19 | ||
|
|
b0e52f20f8 | ||
|
|
72efbe28ed | ||
|
|
52062f96d0 | ||
|
|
308c280e40 | ||
|
|
1e4022e05b | ||
|
|
03656b2320 | ||
|
|
a40bbacb17 | ||
|
|
cb20bf01f9 | ||
|
|
c7664b0a98 | ||
|
|
181d16459c | ||
|
|
ea23d2b91a | ||
|
|
2a28c6cc72 | ||
|
|
654f54d877 | ||
|
|
45ff14d678 | ||
|
|
16bc862904 | ||
|
|
c99dc5c431 | ||
|
|
a9738deae7 | ||
|
|
b4103c56a5 | ||
|
|
0552d75400 | ||
|
|
5ff40b05b3 | ||
|
|
f88ec0c141 | ||
|
|
13b656fe61 | ||
|
|
ca0fae33d1 | ||
|
|
2d46b50dd0 | ||
|
|
bb25c82110 | ||
|
|
d38257ea90 | ||
|
|
39988ad636 | ||
|
|
6a6009dbdf | ||
|
|
190dd456a7 | ||
|
|
978071f2ba | ||
|
|
c0df3dd965 | ||
|
|
456deefdbc | ||
|
|
25b610a36f | ||
|
|
7319293081 | ||
|
|
e8286600d1 | ||
|
|
7ce8c772ad | ||
|
|
0ac432dd25 | ||
|
|
ae4794ab4c | ||
|
|
99d198d0bf | ||
|
|
94da3b8467 | ||
|
|
d54ff0f9c2 | ||
|
|
1f0c18882c | ||
|
|
079e21ea43 | ||
|
|
3e19fbc2fb | ||
|
|
516ec6883d | ||
|
|
5bcf288333 | ||
|
|
546eeda1cd | ||
|
|
0b1eff8b0d | ||
|
|
789b95f259 | ||
|
|
8c8f15c420 | ||
|
|
0ceba49d78 | ||
|
|
706e031046 | ||
|
|
6f0dc2885f | ||
|
|
8050622b0f | ||
|
|
e0356ae01e | ||
|
|
99b7e8ad92 | ||
|
|
788ca2e5df | ||
|
|
fb71800557 |
@@ -1,25 +0,0 @@
|
||||
version: '{build}'
|
||||
platform:
|
||||
- x64
|
||||
image:
|
||||
- Visual Studio 2019
|
||||
- Ubuntu1804
|
||||
install:
|
||||
- cmd: cd c:\tools\vcpkg
|
||||
- cmd: git pull
|
||||
- cmd: bootstrap-vcpkg.bat
|
||||
- cmd: vcpkg install freetype glfw3 --triplet x64-windows-static
|
||||
- cmd: vcpkg integrate install
|
||||
- cmd: cd %APPVEYOR_BUILD_FOLDER%
|
||||
build_script:
|
||||
- cmd: msbuild .\update\build\win32\update.vcxproj
|
||||
- cmd: msbuild .\profiler\build\win32\Tracy.vcxproj
|
||||
- cmd: msbuild .\capture\build\win32\capture.vcxproj
|
||||
- sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev
|
||||
- sh: make -C update/build/unix debug release
|
||||
- sh: make -C profiler/build/unix debug release
|
||||
- sh: make -C capture/build/unix debug release
|
||||
- sh: make -C test
|
||||
- sh: make -C test clean
|
||||
- sh: make -C test TRACYFLAGS=-DTRACY_ON_DEMAND
|
||||
test: off
|
||||
1
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
github: wolfpld
|
||||
BIN
.github/sponsor.png
vendored
Normal file
|
After Width: | Height: | Size: 1.0 KiB |
44
.github/workflows/gcc.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: gcc
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-20.04, macOS-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install linux libraries
|
||||
if: ${{ matrix.os == 'ubuntu-20.04' }}
|
||||
run: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk-3-dev libcapstone-dev libtbb-dev
|
||||
- name: Install macos libraries
|
||||
if: ${{ matrix.os == 'macOS-latest' }}
|
||||
run: brew install capstone tbb pkg-config glfw
|
||||
- name: Profiler GUI
|
||||
run: make -j -C profiler/build/unix debug release
|
||||
- name: Update utility
|
||||
run: make -j -C update/build/unix debug release
|
||||
- name: Capture utility
|
||||
run: make -j -C capture/build/unix debug release
|
||||
- name: Csvexport utility
|
||||
run: make -j -C csvexport/build/unix debug release
|
||||
- name: Import-chrome utility
|
||||
run: make -j -C import-chrome/build/unix debug release
|
||||
- name: Library
|
||||
run: make -j -C library/unix debug release
|
||||
- name: Test application
|
||||
run: |
|
||||
make -j -C test
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS=-DTRACY_ON_DEMAND
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS="-DTRACY_DELAYED_INIT -DTRACY_MANUAL_LIFETIME"
|
||||
28
.github/workflows/latex.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Manual
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Fix stupidity
|
||||
run: |
|
||||
cp AUTHORS AUTHORS.
|
||||
cp LICENSE LICENSE.
|
||||
- name: Compile LaTeX
|
||||
uses: xu-cheng/latex-action@v2
|
||||
with:
|
||||
working_directory: manual
|
||||
root_file: tracy.tex
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: manual
|
||||
path: manual/tracy.pdf
|
||||
57
.github/workflows/msvc.yml
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
name: MSVC
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: windows-2019
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: microsoft/setup-msbuild@v1.0.0
|
||||
- name: Integrate vcpkg
|
||||
run: vcpkg integrate install
|
||||
- name: Build vcpkg libraries
|
||||
run: vcpkg install freetype glfw3 capstone[arm,arm64,x86] --triplet x64-windows-static
|
||||
- name: Profiler GUI Debug
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Profiler GUI Release
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Update utility Debug
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Update utility Release
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Capture utility Debug
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Capture utility Release
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Csvexport utility Debug
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Csvexport utility Release
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Import-chrome utility Debug
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Import-chrome utility Release
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Library
|
||||
run: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Package binaries
|
||||
run: |
|
||||
mkdir bin
|
||||
mkdir bin\dev
|
||||
copy profiler\build\win32\x64\Release\Tracy.exe bin
|
||||
copy update\build\win32\x64\Release\update.exe bin
|
||||
copy capture\build\win32\x64\Release\capture.exe bin
|
||||
copy import-chrome\build\win32\x64\Release\import-chrome.exe bin
|
||||
copy csvexport\build\win32\x64\Release\csvexport.exe bin
|
||||
copy library\win32\x64\Release\TracyProfiler.dll bin\dev
|
||||
copy library\win32\x64\Release\TracyProfiler.lib bin\dev
|
||||
7z a Tracy.7z bin
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
path: Tracy.7z
|
||||
8
.gitignore
vendored
@@ -7,6 +7,7 @@ Release
|
||||
Debug
|
||||
*.d
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
imgui.ini
|
||||
test/tracy_test
|
||||
@@ -18,5 +19,12 @@ manual/t*.out
|
||||
manual/t*.pdf
|
||||
manual/t*.synctex.gz
|
||||
manual/t*.toc
|
||||
manual/t*.bbl
|
||||
manual/t*.blg
|
||||
profiler/build/win32/packages
|
||||
profiler/build/win32/Tracy.aps
|
||||
# include the vcpkg install script but not the files it produces
|
||||
vcpkg/*
|
||||
!vcpkg/install_vcpkg_dependencies.bat
|
||||
.deps/
|
||||
.dirstamp
|
||||
|
||||
17
.vscode/c_cpp_properties.json
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"/usr/include/freetype2"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/clang++",
|
||||
"cStandard": "c11",
|
||||
"cppStandard": "c++17",
|
||||
"intelliSenseMode": "clang-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
27
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/profiler/build/unix/Tracy-debug",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Włącz formatowanie kodu dla gdb",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
20
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
||||
// for the documentation about the tasks.json format
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "Build Profiler",
|
||||
"type": "shell",
|
||||
"command": "make debug -C profiler/build/unix -j 24",
|
||||
"problemMatcher": {
|
||||
"base": "$gcc",
|
||||
"fileLocation": ["relative", "${workspaceRoot}/profiler/build/unix"]
|
||||
},
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
6
AUTHORS
@@ -6,3 +6,9 @@ Rokas Kupstys <rokups@zoho.com> (compatibility fixes, initia
|
||||
Till Rathmann <till.rathmann@gmx.de> (DLL support)
|
||||
Sherief Farouk <sherief.personal@gmail.com> (compatibility fixes)
|
||||
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
|
||||
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
|
||||
Thales Sabino <thales@codeplay.com> (OpenCL support)
|
||||
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
|
||||
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
|
||||
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
|
||||
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)
|
||||
|
||||
4
LICENSE
@@ -1,7 +1,7 @@
|
||||
Tracy Profiler (https://bitbucket.org/wolfpld/tracy) is licensed under the
|
||||
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
|
||||
3-clause BSD license.
|
||||
|
||||
Copyright (c) 2017-2019, Bartosz Taudul <wolf.pld@gmail.com>
|
||||
Copyright (c) 2017-2020, Bartosz Taudul <wolf.pld@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
158
NEWS
@@ -6,9 +6,150 @@ Note: Release numbers are nothing more than numbers. There are some
|
||||
"missing" versions due to trace file changes during development. This is not
|
||||
a mistake.
|
||||
|
||||
v0.6 (xxxx-xx-xx)
|
||||
v0.7.1 (2020-08-24)
|
||||
-------------------
|
||||
|
||||
- Dropped support for pre-v0.6 traces.
|
||||
- Fixed regression on non-AVX2 CPUs.
|
||||
- Fixed incorrect calculation of some ghost zones.
|
||||
- Added list of cached source files.
|
||||
- Added import of plot data.
|
||||
- Secure versions of alloc/free macros.
|
||||
- Automated tracing of vertical synchronization on Windows.
|
||||
- Fixed attachment of postponed frame images.
|
||||
- Source location data can be now copied to clipboard from zone info window.
|
||||
- Zones in find zones menu can be now grouped by zone name.
|
||||
- Vulkan and D3D12 GPU contexts can be now calibrated.
|
||||
- Added CSV export utility.
|
||||
- "Go to frame" popup no longer has a dedicated button. To show it, click on
|
||||
the frame counter.
|
||||
- Added macro for checking if profiler is connected.
|
||||
- Implemented optional data removal from traces in the update utility.
|
||||
- Allow manual management of profiler lifetime.
|
||||
- Adjusted priority of ETW threads to time critical.
|
||||
- Annotations can be now freely adjusted on the timeline.
|
||||
- Limiting time range for find zone functionality has been significantly
|
||||
improved.
|
||||
- Added time range limits for statistics and symbol view.
|
||||
- Implemented call stack sampling on Linux (including Android).
|
||||
- Exact time from start of profiling session can be now viewed by hovering
|
||||
the mouse over the time scale.
|
||||
- Code transfer can be now compiled-out.
|
||||
- Added support for zone markup in unloadable modules.
|
||||
- Added image name filter to sampling statistics results window.
|
||||
|
||||
v0.7 (2020-06-11)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.6 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- chrome:tracing importer now imports zone metadata from "args" key.
|
||||
- Added display of statistical mode to find zone menu.
|
||||
- Automatic stack sampling is now available on windows.
|
||||
- Properly handle tracing on long-running systems.
|
||||
- Message list entries can now show associated frame image.
|
||||
- Call stack window will now display module names.
|
||||
- Symbol location in call stack window may now also display symbol address.
|
||||
- Statistics menu can now be used to display call stack sampling data or
|
||||
list available symbols.
|
||||
- All call paths leading to the sampled instruction in a call stack can be
|
||||
now displayed.
|
||||
- Frame image compression ratio (lossless in-memory compression, not taking
|
||||
into account DXT compression) is displayed in playback window.
|
||||
- Allow reconnection straight from the discard data dialog.
|
||||
- Added ability to set custom names for locks.
|
||||
- Improved handling of network ports.
|
||||
- Added time percentage display to instrumentation statistics.
|
||||
- Display of ghost zones (generated from automated call stack sampling).
|
||||
- Notify when empty labels display is enabled.
|
||||
- Small fragments of executable code will be now sent from client to server.
|
||||
- Added notification about query backlog.
|
||||
- Fixed performance problem with query backlog.
|
||||
- Display number of in-flight queries, in addition to query backlog.
|
||||
- Improved failure reports.
|
||||
- The capture utility will connect to localhost by default.
|
||||
- Added optional support for QPC timer on windows.
|
||||
- Complete rewrite of source file viewer. It is now 100% reliable when going
|
||||
to a source location.
|
||||
- Symbol source view was added.
|
||||
- Extension of source file viewer.
|
||||
- Can display source file, assembly view, or both at the same time.
|
||||
- May include display of statistical profiling data.
|
||||
- Ability to switch between source files which were used to build the
|
||||
symbol.
|
||||
- Ability to switch between inlined functions which are incorporated into
|
||||
the symbol.
|
||||
- Graphical representation of control flow in program.
|
||||
- Display of micro-architectural data for each assembly instruction.
|
||||
- Tracking register dependencies between assembly instructions.
|
||||
- Disassembly may be saved to a file, in order to be processed by external
|
||||
tools.
|
||||
- If the default listening port is occupied, profiler will now try listening
|
||||
on other ports.
|
||||
- Added possibility to perform source file names substitution.
|
||||
- Profiler windows can be now docked.
|
||||
- CPU usage tooltip now displays a list of running threads.
|
||||
- Added possibility to filter discovered clients list.
|
||||
- Source files are now cached during capture.
|
||||
- Profiler will now display a popup when application crashes.
|
||||
- Added ability to send simple integral values as extra payload for zones.
|
||||
- Per-frame zone times on the frames plot can now display self time.
|
||||
- Ability to bind only on localhost interface.
|
||||
- OpenCL profiling.
|
||||
- Direct3D 12 profiling.
|
||||
|
||||
v0.6.3 (2020-02-13)
|
||||
-------------------
|
||||
|
||||
- Fixed performance issues with loading saved traces on Ryzen CPUs.
|
||||
- Profiler window contents are now properly updated during window resize.
|
||||
- Improved tid to pid mapping on windows.
|
||||
- Zero length and unfinished zones are no longer taken into account for
|
||||
statistics.
|
||||
- Build files for shared library are now available (experimental).
|
||||
- GPU zones now also have "active" parameter.
|
||||
- Further reduction of memory usage and on-disk trace size.
|
||||
- Replaced ska::flat_hash_map with robin-hood-hashing.
|
||||
- Speed-up rendering of long lists of items.
|
||||
- Exact event time is displayed in some places in the UI.
|
||||
- Memory allocation lists can now be sorted.
|
||||
- Added display of trace file compression ratio.
|
||||
- Optional Zstd compression of trace files.
|
||||
- Frame images are now internally compressed using Zstd (instead of LZ4).
|
||||
- Fix display of continuous frame set tooltips.
|
||||
|
||||
v0.6.2 (2019-12-30)
|
||||
-------------------
|
||||
|
||||
- Improved call stack decoding on OSX.
|
||||
- Collection of CPU topology data.
|
||||
- C API now supports allocated source locations.
|
||||
- Added chrome:tracing importer.
|
||||
- Allow merging of ZoneText() strings.
|
||||
- Time distribution can now show both exclusive and inclusive times.
|
||||
- Display proper value of selection time in find zone menu.
|
||||
- Implemented limiting find zone search to a specified time range.
|
||||
- Highlight hovered zone from find zone menu zone list on the histogram.
|
||||
- Allow copying user data directory location to the clipboard.
|
||||
|
||||
v0.6.1 (2019-11-28)
|
||||
-------------------
|
||||
|
||||
- Dropped support for pre-v0.5 traces.
|
||||
- Improve BSD support.
|
||||
- GPU zone CPU thread highlight will now highlight whole thread, not only
|
||||
the thread name.
|
||||
- Added CPU thread highlight for CPU data items.
|
||||
- Client parameters may be now set from the server.
|
||||
- Minor UI fixes.
|
||||
|
||||
v0.6 (2019-11-17)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.5 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- Dropped support for pre-v0.4 traces.
|
||||
- Major memory usage decrease.
|
||||
- Significant network bandwidth decrease.
|
||||
@@ -29,8 +170,8 @@ v0.6 (xxxx-xx-xx)
|
||||
programs.
|
||||
- Thread migrations across CPU cores can be graphed.
|
||||
- System-wide workload distribution is now plotted on the timeline.
|
||||
- Added "CPU data" window (accessible from the trace info window), showing
|
||||
programs competing for CPU during the capture.
|
||||
- Added "CPU data" window showing programs competing for CPU during the
|
||||
capture.
|
||||
- Switched to using native thread identifiers (relatively small numbers), as
|
||||
opposed to pthreads identifiers, which in reality were pointers.
|
||||
- Improved thread name discovery if context switch capture is enabled.
|
||||
@@ -42,7 +183,8 @@ v0.6 (xxxx-xx-xx)
|
||||
- Per-frame zone times are now displayed on the frames plot when a zone is
|
||||
selected in the find zone menu.
|
||||
- Zone color is now displayed in zone information window.
|
||||
- Zone colors can now be determined basing on thread and depth.
|
||||
- Zone colors can now be determined basing on depth and thread or source
|
||||
location.
|
||||
- Thread colors are displayed across the profiler application.
|
||||
- Frame times can be now compared.
|
||||
- Expose more lock handling functionality.
|
||||
@@ -52,6 +194,14 @@ v0.6 (xxxx-xx-xx)
|
||||
- Added time distribution data in the zone information window.
|
||||
- Trace file name is now displayed in trace information window.
|
||||
- Annotations can be now added to the timeline.
|
||||
- Server now performs network data retrieval and decompression on a dedicated
|
||||
thread.
|
||||
- Added examples of Tracy integration.
|
||||
- Allow grouping of zones in the find zone menu by zone parent or with no
|
||||
grouping.
|
||||
- Zone list in the statistics window can be now filtered.
|
||||
- Implemented configuration of plots.
|
||||
- Messages can now collect call stacks.
|
||||
|
||||
v0.5 (2019-08-10)
|
||||
-----------------
|
||||
|
||||
73
README.md
@@ -1,73 +1,22 @@
|
||||
# Tracy Profiler
|
||||
|
||||
[](https://ci.appveyor.com/project/wolfpld/tracy/branch/master)
|
||||
[](https://github.com/sponsors/wolfpld/)
|
||||
|
||||
Tracy is a real time, nanosecond resolution frame profiler that can be used for remote or embedded telemetry of your application. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan) and memory. It also can display locks held by threads and their interactions with each other.
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, OpenCL, Direct3D 12), memory, locks, context switches, per-frame screenshots and more.
|
||||
|
||||
For usage **and build process** instructions, consult the user manual [at the following address](https://github.com/wolfpld/tracy/releases).
|
||||
|
||||

|
||||
|
||||
The following compilers are supported:
|
||||

|
||||
|
||||
- MSVC
|
||||
- gcc
|
||||
- clang
|
||||
|
||||
The following platforms are confirmed to be working (this is not a complete list):
|
||||
|
||||
- Windows (x86, x64)
|
||||
- Linux (x86, x64, ARM, ARM64)
|
||||
- Android (ARM, x86)
|
||||
- FreeBSD (x64)
|
||||
- Cygwin (x64)
|
||||
- WSL (x64)
|
||||
- OSX (x64)
|
||||
[Changelog](NEWS)
|
||||
|
||||
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
|
||||
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
|
||||
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
|
||||
[List of changes.](NEWS)
|
||||
|
||||
### High-level overview
|
||||
|
||||

|
||||
|
||||
Tracy is split into client and server side. The client side collects events using a high-efficiency queue and awaits for an incoming connection. The server part connects to client and receives collected data from the client, which is then reconstructed into a viewable timeline. The transfer is performed using a TCP connection.
|
||||
|
||||
### Performance impact
|
||||
|
||||
To check how much slowdown is introduced by using Tracy, I have profiled [etcpak](https://bitbucket.org/wolfpld/etcpak), which is the fastest ETC texture compression utility there is. I used an 8192×8192 test image as input data and instrumented everything down to the 4×4 pixel block compression function (that's 4 million blocks to compress). It should be noted that Tracy needs to calibrate its internal timers at each run. This introduces a delay of 115 ms (on my machine), which is negligible when doing lengthy profiling runs, but it skews the results of etcpak timing. The following times have this delay subtracted, to give focus on zone collection impact, which is the thing that really matters here.
|
||||
|
||||
| Scenario | Zones | Clean run | Profiling run | Difference |
|
||||
|-------------------------------------------------------|---------|-----------|---------------|------------|
|
||||
| Compression of an image to ETC1 format | 4194568 | 0.94 s | 1.003 s | +0.063 s |
|
||||
| Compression of an image to ETC2 format, with mip-maps | 5592822 | 1.034 s | 1.119 s | +0.085 s |
|
||||
|
||||
In both scenarios the per-zone time cost is at ~15 ns. This is in line with the measured 8 ns single event collection time (each zone has to report start and end event).
|
||||
|
||||
## Usage instructions
|
||||
|
||||
The user manual for Tracy is available [at the following address](https://bitbucket.org/wolfpld/tracy/downloads/tracy.pdf). It provides information about the integration process, required code markup and so on.
|
||||
|
||||
## Features
|
||||
|
||||
#### Histogram of function execution times
|
||||
|
||||

|
||||
|
||||
#### Comparison of two profiling runs
|
||||
|
||||

|
||||
|
||||
#### Marking locks
|
||||
|
||||

|
||||
|
||||
#### Plotting data
|
||||
|
||||

|
||||
|
||||
#### Message log
|
||||
|
||||

|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
|
||||
|
||||
7
TODO
Normal file
@@ -0,0 +1,7 @@
|
||||
"Would be nice to have" list for 1.0 release:
|
||||
=============================================
|
||||
|
||||
* Pack queue items tightly in the queues.
|
||||
* Use level-of-detail system for plots.
|
||||
* Use per-thread lock data structures.
|
||||
* Use DTrace for BSD/OSX context switch capture.
|
||||
129
Tracy.hpp
@@ -11,13 +11,20 @@
|
||||
#define ZoneNamedC(x,y,z)
|
||||
#define ZoneNamedNC(x,y,z,w)
|
||||
|
||||
#define ZoneTransient(x,y)
|
||||
#define ZoneTransientN(x,y,z)
|
||||
|
||||
#define ZoneScoped
|
||||
#define ZoneScopedN(x)
|
||||
#define ZoneScopedC(x)
|
||||
#define ZoneScopedNC(x,y)
|
||||
|
||||
#define ZoneText(x,y)
|
||||
#define ZoneTextV(x,y,z)
|
||||
#define ZoneName(x,y)
|
||||
#define ZoneNameV(x,y,z)
|
||||
#define ZoneValue(x)
|
||||
#define ZoneValueV(x,y)
|
||||
|
||||
#define FrameMark
|
||||
#define FrameMarkNamed(x)
|
||||
@@ -33,8 +40,10 @@
|
||||
#define LockableBase( type ) type
|
||||
#define SharedLockableBase( type ) type
|
||||
#define LockMark(x) (void)x;
|
||||
#define LockableName(x,y,z);
|
||||
|
||||
#define TracyPlot(x,y)
|
||||
#define TracyPlotConfig(x,y)
|
||||
|
||||
#define TracyMessage(x,y)
|
||||
#define TracyMessageL(x)
|
||||
@@ -44,12 +53,17 @@
|
||||
|
||||
#define TracyAlloc(x,y)
|
||||
#define TracyFree(x)
|
||||
#define TracySecureAlloc(x,y)
|
||||
#define TracySecureFree(x)
|
||||
|
||||
#define ZoneNamedS(x,y,z)
|
||||
#define ZoneNamedNS(x,y,z,w)
|
||||
#define ZoneNamedCS(x,y,z,w)
|
||||
#define ZoneNamedNCS(x,y,z,w,a)
|
||||
|
||||
#define ZoneTransientS(x,y,z)
|
||||
#define ZoneTransientNS(x,y,z,w)
|
||||
|
||||
#define ZoneScopedS(x)
|
||||
#define ZoneScopedNS(x,y)
|
||||
#define ZoneScopedCS(x,y)
|
||||
@@ -57,23 +71,42 @@
|
||||
|
||||
#define TracyAllocS(x,y,z)
|
||||
#define TracyFreeS(x,y)
|
||||
#define TracySecureAllocS(x,y,z)
|
||||
#define TracySecureFreeS(x,y)
|
||||
|
||||
#define TracyMessageS(x,y,z)
|
||||
#define TracyMessageLS(x,y)
|
||||
#define TracyMessageCS(x,y,z,w)
|
||||
#define TracyMessageLCS(x,y,z)
|
||||
|
||||
#define TracyParameterRegister(x)
|
||||
#define TracyParameterSetup(x,y,z,w)
|
||||
#define TracyIsConnected false
|
||||
|
||||
#else
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "client/TracyLock.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyScoped.hpp"
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
#endif
|
||||
|
||||
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
|
||||
@@ -82,7 +115,11 @@
|
||||
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
|
||||
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size );
|
||||
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size );
|
||||
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size );
|
||||
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value );
|
||||
#define ZoneValueV( varname, value ) varname.Value( value );
|
||||
|
||||
#define FrameMark tracy::Profiler::SendFrameMark( nullptr );
|
||||
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name );
|
||||
@@ -91,49 +128,74 @@
|
||||
|
||||
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
|
||||
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
#define LockableName( varname, txt, size ) varname.CustomName( txt, size );
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
|
||||
#define TracyPlotConfig( name, type ) tracy::Profiler::ConfigurePlot( name, type );
|
||||
|
||||
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size );
|
||||
#define TracyMessageL( txt ) tracy::Profiler::Message( txt );
|
||||
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color );
|
||||
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color );
|
||||
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK );
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK );
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
|
||||
#else
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr );
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
|
||||
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth );
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth );
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
|
||||
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
|
||||
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
|
||||
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
|
||||
# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth );
|
||||
# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth );
|
||||
#else
|
||||
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
|
||||
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
|
||||
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
|
||||
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneScoped
|
||||
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
|
||||
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
|
||||
@@ -141,8 +203,19 @@
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
|
||||
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
|
||||
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
|
||||
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
|
||||
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
|
||||
# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color )
|
||||
# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color )
|
||||
#endif
|
||||
|
||||
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
|
||||
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
|
||||
#define TracyIsConnected tracy::GetProfiler().IsConnected()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
101
TracyC.h
@@ -5,11 +5,17 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include "client/TracyCallstack.h"
|
||||
#include "common/TracyApi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name );
|
||||
|
||||
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
|
||||
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
typedef const void* TracyCZoneCtx;
|
||||
@@ -21,9 +27,12 @@ typedef const void* TracyCZoneCtx;
|
||||
#define TracyCZoneEnd(c)
|
||||
#define TracyCZoneText(c,x,y)
|
||||
#define TracyCZoneName(c,x,y)
|
||||
#define TracyCZoneValue(c,x)
|
||||
|
||||
#define TracyCAlloc(x,y)
|
||||
#define TracyCFree(x)
|
||||
#define TracyCSecureAlloc(x,y)
|
||||
#define TracyCSecureFree(x)
|
||||
|
||||
#define TracyCFrameMark
|
||||
#define TracyCFrameMarkNamed(x)
|
||||
@@ -38,6 +47,21 @@ typedef const void* TracyCZoneCtx;
|
||||
#define TracyCMessageLC(x,y)
|
||||
#define TracyCAppInfo(x,y)
|
||||
|
||||
#define TracyCZoneS(x,y,z)
|
||||
#define TracyCZoneNS(x,y,z,w)
|
||||
#define TracyCZoneCS(x,y,z,w)
|
||||
#define TracyCZoneNCS(x,y,z,w,a)
|
||||
|
||||
#define TracyCAllocS(x,y,z)
|
||||
#define TracyCFreeS(x,y)
|
||||
#define TracyCSecureAllocS(x,y,z)
|
||||
#define TracyCSecureFreeS(x,y)
|
||||
|
||||
#define TracyCMessageS(x,y,z)
|
||||
#define TracyCMessageLS(x,y)
|
||||
#define TracyCMessageCS(x,y,z,w)
|
||||
#define TracyCMessageLCS(x,y,z)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef TracyConcat
|
||||
@@ -66,11 +90,18 @@ struct ___tracy_c_zone_context
|
||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||
|
||||
TRACY_API void ___tracy_init_thread(void);
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
|
||||
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active );
|
||||
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
|
||||
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
@@ -88,19 +119,39 @@ TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size
|
||||
|
||||
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
|
||||
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
|
||||
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth );
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
|
||||
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_alloc_free_callstack( ptr, TRACY_CALLSTACK )
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
#else
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr );
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 );
|
||||
#endif
|
||||
|
||||
|
||||
@@ -117,17 +168,9 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size );
|
||||
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt );
|
||||
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color );
|
||||
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color );
|
||||
#define TracyCAppInfo( txt, color ) ___tracy_emit_message_appinfo( txt, color );
|
||||
|
||||
|
||||
@@ -137,8 +180,30 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_alloc_free_callstack( ptr, depth )
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
|
||||
# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
|
||||
#else
|
||||
# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active )
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active )
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active )
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active )
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
|
||||
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
|
||||
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
|
||||
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
|
||||
# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color )
|
||||
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -15,6 +15,10 @@
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push, 0)
|
||||
#endif
|
||||
|
||||
#include "common/tracy_lz4.cpp"
|
||||
#include "client/TracyProfiler.cpp"
|
||||
#include "client/TracyCallstack.cpp"
|
||||
@@ -24,20 +28,25 @@
|
||||
#include "client/tracy_rpmalloc.cpp"
|
||||
#include "client/TracyDxt1.cpp"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "libbacktrace/alloc.cpp"
|
||||
# include "libbacktrace/dwarf.cpp"
|
||||
# include "libbacktrace/elf.cpp"
|
||||
# include "libbacktrace/fileline.cpp"
|
||||
# include "libbacktrace/mmapio.cpp"
|
||||
# include "libbacktrace/posix.cpp"
|
||||
# include "libbacktrace/sort.cpp"
|
||||
# include "libbacktrace/state.cpp"
|
||||
# if TRACY_HAS_CALLSTACK == 4
|
||||
# include "libbacktrace/macho.cpp"
|
||||
# else
|
||||
# include "libbacktrace/elf.cpp"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma comment(lib, "ws2_32.lib")
|
||||
# pragma comment(lib, "dbghelp.lib")
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
//
|
||||
// Tracy profiler
|
||||
// ----------------
|
||||
//
|
||||
// On multi-DLL projects compile and
|
||||
// link with this source file (and none
|
||||
// other) in the executable and in
|
||||
// DLLs / shared objects that link to
|
||||
// the main DLL.
|
||||
//
|
||||
|
||||
// Define TRACY_ENABLE to enable profiler.
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# ifndef TRACY_IMPORTS
|
||||
# define TRACY_IMPORTS 1
|
||||
# endif
|
||||
#endif
|
||||
#include "common/TracySystem.cpp"
|
||||
387
TracyD3D12.hpp
Normal file
@@ -0,0 +1,387 @@
|
||||
#ifndef __TRACYD3D12_HPP__
|
||||
#define __TRACYD3D12_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D12Context(device, queue) nullptr
|
||||
#define TracyD3D12Destroy(ctx)
|
||||
|
||||
#define TracyD3D12NewFrame(ctx)
|
||||
|
||||
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
#define TracyD3D12Zone(ctx, cmdList, name)
|
||||
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
|
||||
|
||||
#define TracyD3D12Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D12ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
#include <wrl/client.h>
|
||||
#include <queue>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
|
||||
|
||||
bool m_initialized = false;
|
||||
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_context;
|
||||
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = MaxQueries;
|
||||
uint32_t m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
|
||||
uint32_t m_activePayload = 0;
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
|
||||
int64_t m_prevCalibration = 0;
|
||||
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
|
||||
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
|
||||
if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData))))
|
||||
{
|
||||
assert(false && "Platform does not support profiling of copy queues.");
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t timestampFrequency;
|
||||
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
assert(false && "Failed to get timestamp frequency.");
|
||||
}
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibration = cpuTimestamp * m_qpcToNs;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
assert(false && "Failed to create query readback buffer.");
|
||||
}
|
||||
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
assert(false && "Failed to create payload fence.");
|
||||
}
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||
MemWrite(&item->gpuNewContext.context, m_context);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, m_queryCounter });
|
||||
m_previousQueryCounter += m_queryCounter;
|
||||
m_queryCounter = 0;
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
|
||||
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
assert(false && "Failed to map readback buffer.");
|
||||
}
|
||||
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
cpuTimestamp *= m_qpcToNs;
|
||||
|
||||
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
|
||||
if (cpuDelta > 0)
|
||||
{
|
||||
m_prevCalibration = cpuTimestamp;
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
|
||||
MemWrite(&item->gpuCalibration.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
assert(m_queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + m_queryCounter) % m_queryLimit;
|
||||
m_queryCounter += 2; // Allocate space for a begin and end query.
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
#if defined(TRACY_HAS_CALLSTACK) && defined(TRACY_CALLSTACK)
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
#else
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
#endif
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
#if defined(TRACY_HAS_CALLSTACK) && defined(TRACY_CALLSTACK)
|
||||
GetProfiler().SendCallstack(TRACY_CALLSTACK);
|
||||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||
|
||||
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
|
||||
|
||||
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||
|
||||
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
#define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
|
||||
#define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
|
||||
|
||||
#define TracyD3D12Collect(ctx) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
234
TracyLua.hpp
@@ -125,6 +125,7 @@ static inline void LuaRemove( char* script )
|
||||
#else
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
@@ -150,9 +151,9 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
const char* func[64];
|
||||
uint32_t fsz[64];
|
||||
uint32_t ssz[64];
|
||||
uint32_t spaceNeeded = 4; // cnt
|
||||
|
||||
uint32_t cnt;
|
||||
uint8_t cnt;
|
||||
uint16_t spaceNeeded = sizeof( cnt );
|
||||
for( cnt=0; cnt<depth; cnt++ )
|
||||
{
|
||||
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
|
||||
@@ -162,31 +163,29 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
|
||||
spaceNeeded += fsz[cnt] + ssz[cnt];
|
||||
}
|
||||
spaceNeeded += cnt * ( 4 + 4 + 4 ); // source line, function string length, source string length
|
||||
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
|
||||
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 4 );
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
|
||||
auto dst = ptr;
|
||||
memcpy( dst, &spaceNeeded, 4 ); dst += 4;
|
||||
memcpy( dst, &cnt, 4 ); dst += 4;
|
||||
for( uint32_t i=0; i<cnt; i++ )
|
||||
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
|
||||
memcpy( dst, &cnt, 1 ); dst++;
|
||||
for( uint8_t i=0; i<cnt; i++ )
|
||||
{
|
||||
const uint32_t line = dbg[i].currentline;
|
||||
memcpy( dst, &line, 4 ); dst += 4;
|
||||
memcpy( dst, fsz+i, 4 ); dst += 4;
|
||||
assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, fsz+i, 2 ); dst += 2;
|
||||
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
|
||||
memcpy( dst, ssz+i, 4 ); dst += 4;
|
||||
assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, ssz+i, 2 ); dst += 2;
|
||||
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
|
||||
}
|
||||
assert( dst - ptr == spaceNeeded + 4 );
|
||||
assert( dst - ptr == spaceNeeded + 2 );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAlloc.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginS( lua_State* L )
|
||||
@@ -198,41 +197,14 @@ static inline int LuaZoneBeginS( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
@@ -253,45 +225,16 @@ static inline int LuaZoneBeginNS( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
size_t nsz;
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
// nsz zone name
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
@@ -316,41 +259,14 @@ static inline int LuaZoneBegin( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -367,45 +283,16 @@ static inline int LuaZoneBeginN( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
size_t nsz;
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
// nsz zone name
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -423,13 +310,9 @@ static inline int LuaZoneEnd( lua_State* L )
|
||||
}
|
||||
#endif
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -446,17 +329,14 @@ static inline int LuaZoneText( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneText );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -473,17 +353,14 @@ static inline int LuaZoneName( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneName );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -495,18 +372,15 @@ static inline int LuaMessage( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
TracyLfqPrepare( QueueType::Message );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::Message );
|
||||
MemWrite( &item->message.time, Profiler::GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
MemWrite( &item->messageFat.time, Profiler::GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
338
TracyOpenCL.hpp
Normal file
@@ -0,0 +1,338 @@
|
||||
#ifndef __TRACYOPENCL_HPP__
|
||||
#define __TRACYOPENCL_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyCLContext(c, x) nullptr
|
||||
#define TracyCLDestroy(c)
|
||||
|
||||
#define TracyCLNamedZone(c, x, y, z)
|
||||
#define TracyCLNamedZoneC(c, x, y, z, w)
|
||||
#define TracyCLZone(c, x)
|
||||
#define TracyCLZoneC(c, x, y)
|
||||
|
||||
#define TracyCLNamedZoneS(c, x, y, z, w)
|
||||
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
|
||||
#define TracyCLZoneS(c, x, y)
|
||||
#define TracyCLZoneCS(c, x, y, z)
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(x, e)
|
||||
#define TracyCLZoneSetEvent(e)
|
||||
|
||||
#define TracyCLCollect(c)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class OpenCLCtxScope {};
|
||||
}
|
||||
|
||||
using TracyCLCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy {
|
||||
|
||||
enum class EventPhase : uint8_t
|
||||
{
|
||||
Begin,
|
||||
End
|
||||
};
|
||||
|
||||
struct EventInfo
|
||||
{
|
||||
cl_event event;
|
||||
EventPhase phase;
|
||||
};
|
||||
|
||||
class OpenCLCtx
|
||||
{
|
||||
public:
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
OpenCLCtx(cl_context context, cl_device_id device)
|
||||
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
, m_head(0)
|
||||
, m_tail(0)
|
||||
{
|
||||
assert(m_contextId != 255);
|
||||
|
||||
m_hostStartTime = Profiler::GetTime();
|
||||
m_deviceStartTime = GetDeviceTimestamp(context, device);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, m_hostStartTime);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, m_hostStartTime);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (m_tail == m_head) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (m_tail != m_head)
|
||||
{
|
||||
EventInfo eventInfo = m_query[m_tail];
|
||||
cl_event event = eventInfo.event;
|
||||
cl_int eventStatus;
|
||||
cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
if (eventStatus != CL_COMPLETE) return;
|
||||
|
||||
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
|
||||
? CL_PROFILING_COMMAND_START
|
||||
: CL_PROFILING_COMMAND_END;
|
||||
|
||||
cl_ulong eventTimeStamp = 0;
|
||||
err = clGetEventProfilingInfo(event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
assert(eventTimeStamp != 0);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, TimestampOffset(eventTimeStamp));
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
if (eventInfo.phase == EventPhase::End)
|
||||
{
|
||||
// Done with the event, so release it
|
||||
assert(clReleaseEvent(event) == CL_SUCCESS);
|
||||
}
|
||||
|
||||
m_tail = (m_tail + 1) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = (m_head + 1) % QueryCount;
|
||||
assert(m_head != m_tail);
|
||||
m_query[id] = eventInfo;
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline EventInfo& GetQuery(unsigned int id)
|
||||
{
|
||||
assert(id < QueryCount);
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline int64_t GetHostStartTime() const
|
||||
{
|
||||
return m_hostStartTime;
|
||||
}
|
||||
|
||||
tracy_force_inline int64_t GetDeviceStartTime() const
|
||||
{
|
||||
return m_deviceStartTime;
|
||||
}
|
||||
|
||||
tracy_force_inline int64_t TimestampOffset(int64_t deviceTimestamp) const
|
||||
{
|
||||
return m_hostStartTime + (deviceTimestamp - m_deviceStartTime);
|
||||
}
|
||||
|
||||
tracy_force_inline int64_t GetDeviceTimestamp(cl_context context, cl_device_id device) const
|
||||
{
|
||||
cl_ulong deviceTimestamp = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
assert(err == CL_SUCCESS);
|
||||
uint32_t dummyValue = 42;
|
||||
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
|
||||
assert(err == CL_SUCCESS);
|
||||
cl_event writeBufferEvent;
|
||||
err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_TRUE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clWaitForEvents(1, &writeBufferEvent);
|
||||
assert(err == CL_SUCCESS);
|
||||
cl_int eventStatus;
|
||||
err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
assert(eventStatus == CL_COMPLETE);
|
||||
err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &deviceTimestamp, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseEvent(writeBufferEvent);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseMemObject(dummyBuffer);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseCommandQueue(queue);
|
||||
assert(err == CL_SUCCESS);
|
||||
|
||||
return (int64_t)deviceTimestamp;
|
||||
}
|
||||
|
||||
unsigned int m_contextId;
|
||||
|
||||
EventInfo m_query[QueryCount];
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
|
||||
int64_t m_hostStartTime;
|
||||
int64_t m_deviceStartTime;
|
||||
};
|
||||
|
||||
class OpenCLCtxScope {
|
||||
public:
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
GetProfiler().SendCallstack(depth);
|
||||
}
|
||||
|
||||
tracy_force_inline void SetEvent(cl_event event)
|
||||
{
|
||||
m_event = event;
|
||||
assert(clRetainEvent(m_event) == CL_SUCCESS);
|
||||
m_ctx->GetQuery(m_beginQueryId).event = m_event;
|
||||
}
|
||||
|
||||
tracy_force_inline ~OpenCLCtxScope()
|
||||
{
|
||||
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
OpenCLCtx* m_ctx;
|
||||
cl_event m_event;
|
||||
unsigned int m_beginQueryId;
|
||||
};
|
||||
|
||||
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
|
||||
new (ctx) OpenCLCtx(context, device);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyCLContext(OpenCLCtx* ctx)
|
||||
{
|
||||
ctx->~OpenCLCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
} // namespace tracy
|
||||
|
||||
using TracyCLCtx = tracy::OpenCLCtx*;
|
||||
|
||||
#define TracyCLContext(context, device) tracy::CreateCLContext(context, device);
|
||||
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
|
||||
#else
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
|
||||
#else
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
|
||||
#endif
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
|
||||
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
|
||||
|
||||
#define TracyCLCollect(ctx) ctx->Collect()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
113
TracyOpenGL.hpp
@@ -1,19 +1,21 @@
|
||||
#ifndef __TRACYOPENGL_HPP__
|
||||
#define __TRACYOPENGL_HPP__
|
||||
|
||||
// Include this file after you include OpenGL 3.2 headers.
|
||||
#if !defined GL_TIMESTAMP && !defined GL_TIMESTAMP_EXT
|
||||
# error "You must include OpenGL 3.2 headers before including TracyOpenGL.hpp"
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_ENABLE || defined __APPLE__
|
||||
|
||||
#define TracyGpuContext
|
||||
#define TracyGpuNamedZone(x,y)
|
||||
#define TracyGpuNamedZoneC(x,y,z)
|
||||
#define TracyGpuNamedZone(x,y,z)
|
||||
#define TracyGpuNamedZoneC(x,y,z,w)
|
||||
#define TracyGpuZone(x)
|
||||
#define TracyGpuZoneC(x,y)
|
||||
#define TracyGpuCollect
|
||||
|
||||
#define TracyGpuNamedZoneS(x,y,z)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w)
|
||||
#define TracyGpuNamedZoneS(x,y,z,w)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w,a)
|
||||
#define TracyGpuZoneS(x,y)
|
||||
#define TracyGpuZoneCS(x,y,z)
|
||||
|
||||
@@ -23,8 +25,8 @@ struct SourceLocationData;
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
GpuCtxScope( const SourceLocationData* ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int depth ) {}
|
||||
GpuCtxScope( const SourceLocationData*, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int, bool ) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -48,28 +50,28 @@ public:
|
||||
# define glQueryCounter glQueryCounterEXT
|
||||
#endif
|
||||
|
||||
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
|
||||
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK )
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
#else
|
||||
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
|
||||
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color )
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
|
||||
#endif
|
||||
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyGpuNamedZoneS( varname, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth )
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
|
||||
#else
|
||||
# define TracyGpuNamedZoneS( varname, name, depth ) TracyGpuNamedZone( varname, name )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth ) TracyGpuNamedZoneC( varname, name, color )
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
|
||||
#endif
|
||||
@@ -101,24 +103,21 @@ public:
|
||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||
|
||||
const float period = 1.f;
|
||||
Magic magic;
|
||||
const auto thread = GetThreadHandle();
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
TracyLfqPrepare( QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Collect()
|
||||
@@ -135,10 +134,6 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
|
||||
while( m_tail != m_head )
|
||||
{
|
||||
GLint available;
|
||||
@@ -148,12 +143,11 @@ public:
|
||||
uint64_t time;
|
||||
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
|
||||
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuTime );
|
||||
TracyLfqPrepare( QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
|
||||
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
|
||||
m_tail = ( m_tail + 1 ) % QueryCount;
|
||||
}
|
||||
@@ -188,81 +182,68 @@ private:
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc )
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( GetProfiler().IsConnected() )
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth )
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( GetProfiler().IsConnected() )
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
const auto thread = GetThreadHandle();
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, thread );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
|
||||
tracy_force_inline ~GpuCtxScope()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
|
||||
TracyLfqPrepare( QueueType::GpuZoneEnd );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const bool m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
222
TracyVulkan.hpp
@@ -4,15 +4,16 @@
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyVkContext(x,y,z,w) nullptr
|
||||
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
|
||||
#define TracyVkDestroy(x)
|
||||
#define TracyVkNamedZone(c,x,y,z)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w)
|
||||
#define TracyVkNamedZone(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w,a)
|
||||
#define TracyVkZone(c,x,y)
|
||||
#define TracyVkZoneC(c,x,y,z)
|
||||
#define TracyVkCollect(c,x)
|
||||
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v)
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
|
||||
#define TracyVkZoneS(c,x,y,z)
|
||||
#define TracyVkZoneCS(c,x,y,z,w)
|
||||
|
||||
@@ -42,16 +43,36 @@ class VkCtx
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
|
||||
if( num > 4 ) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
|
||||
for( uint32_t i=0; i<num; i++ )
|
||||
{
|
||||
// TODO VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT
|
||||
if( data[i] == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT )
|
||||
{
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
vkGetPhysicalDeviceProperties( physdev, &prop );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
@@ -81,21 +102,56 @@ public:
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
int64_t tgpu;
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
tcpu = Profiler::GetTime();
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
}
|
||||
else
|
||||
{
|
||||
enum { NumProbes = 32 };
|
||||
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ )
|
||||
{
|
||||
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ )
|
||||
{
|
||||
if( minDeviation > deviation[i] )
|
||||
{
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
@@ -104,7 +160,9 @@ public:
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
@@ -130,6 +188,8 @@ public:
|
||||
{
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
m_head = m_tail = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -161,6 +221,25 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
int64_t tgpu, tcpu;
|
||||
Calibrate( m_device, tcpu, tgpu );
|
||||
const auto refCpu = Profiler::GetTime();
|
||||
const auto delta = tcpu - m_prevCalibration;
|
||||
if( delta > 0 )
|
||||
{
|
||||
m_prevCalibration = tcpu;
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||
|
||||
m_tail += cnt;
|
||||
@@ -181,8 +260,35 @@ private:
|
||||
return m_context;
|
||||
}
|
||||
|
||||
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation;
|
||||
do
|
||||
{
|
||||
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
|
||||
}
|
||||
while( deviation > m_deviation );
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1] * m_qpcToNs;
|
||||
#else
|
||||
assert( false );
|
||||
#endif
|
||||
}
|
||||
|
||||
VkDevice m_device;
|
||||
VkQueryPool m_query;
|
||||
VkTimeDomainEXT m_timeDomain;
|
||||
uint64_t m_deviation;
|
||||
int64_t m_qpcToNs;
|
||||
int64_t m_prevCalibration;
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
@@ -191,23 +297,26 @@ private:
|
||||
unsigned int m_queryCount;
|
||||
|
||||
int64_t* m_res;
|
||||
|
||||
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
|
||||
};
|
||||
|
||||
class VkCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf )
|
||||
: m_cmdbuf( cmdbuf )
|
||||
, m_ctx( ctx )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
, m_active( GetProfiler().IsConnected() )
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
@@ -219,19 +328,19 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth )
|
||||
: m_cmdbuf( cmdbuf )
|
||||
, m_ctx( ctx )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
, m_active( GetProfiler().IsConnected() )
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
@@ -247,11 +356,10 @@ public:
|
||||
|
||||
tracy_force_inline ~VkCtxScope()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
#endif
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
@@ -263,18 +371,17 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
VkCommandBuffer m_cmdbuf;
|
||||
VkCtx* m_ctx;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const bool m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -288,29 +395,30 @@ static inline void DestroyVkContext( VkCtx* ctx )
|
||||
|
||||
using TracyVkCtx = tracy::VkCtx*;
|
||||
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf );
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
|
||||
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK )
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
|
||||
#else
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color )
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
|
||||
#endif
|
||||
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth )
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
|
||||
#else
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) TracyVkNamedZone( ctx, varname, cmdbuf, name )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color )
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
|
||||
#endif
|
||||
|
||||
@@ -1,60 +1,12 @@
|
||||
CFLAGS +=
|
||||
CXXFLAGS := $(CFLAGS) -std=gnu++17
|
||||
DEFINES += -DTRACY_NO_STATISTICS
|
||||
INCLUDES :=
|
||||
LIBS := -lpthread
|
||||
INCLUDES := $(shell pkg-config --cflags capstone)
|
||||
LIBS += $(shell pkg-config --libs capstone) -lpthread
|
||||
PROJECT := capture
|
||||
IMAGE := $(PROJECT)-$(BUILD)
|
||||
|
||||
FILTER :=
|
||||
include ../../../common/src-from-vcxproj.mk
|
||||
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
SRC := $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 := $(filter-out $(FILTER),$(BASE2))
|
||||
|
||||
TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?)
|
||||
ifeq ($(TBB),0)
|
||||
LIBS += -ltbb
|
||||
endif
|
||||
|
||||
OBJDIRBASE := obj/$(BUILD)
|
||||
OBJDIR := $(OBJDIRBASE)/o/o/o
|
||||
|
||||
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
|
||||
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
$(OBJDIR)/%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(IMAGE): $(OBJ) $(OBJ2)
|
||||
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) %(addprefix $(OBJDIR)/,$(SRC2:.c=.d))
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJDIRBASE) $(IMAGE)*
|
||||
|
||||
.PHONY: clean all
|
||||
include ../../../common/unix.mk
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -O3 -s -fomit-frame-pointer
|
||||
CFLAGS := -O3 -s -march=native
|
||||
DEFINES := -DNDEBUG
|
||||
BUILD := release
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
|
||||
<RootNamespace>capture</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
@@ -88,10 +89,12 @@
|
||||
<PreprocessorDefinitions>TRACY_NO_STATISTICS;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\debug\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
@@ -120,12 +123,14 @@
|
||||
<PreprocessorDefinitions>TRACY_NO_STATISTICS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
@@ -133,13 +138,40 @@
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c" />
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
|
||||
<ClCompile Include="..\..\..\zstd\debug.c" />
|
||||
<ClCompile Include="..\..\..\zstd\entropy_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\error_private.c" />
|
||||
<ClCompile Include="..\..\..\zstd\fse_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\fse_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\hist.c" />
|
||||
<ClCompile Include="..\..\..\zstd\huf_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\pool.c" />
|
||||
<ClCompile Include="..\..\..\zstd\threading.c" />
|
||||
<ClCompile Include="..\..\..\zstd\xxhash.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_lazy.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ldm.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_opt.c" />
|
||||
<ClCompile Include="..\..\src\capture.cpp" />
|
||||
<ClCompile Include="..\..\src\getopt.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
||||
@@ -150,23 +182,52 @@
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_benaphore.h" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_sema.h" />
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h" />
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp" />
|
||||
<ClInclude Include="..\..\src\getopt.h" />
|
||||
<ClInclude Include="..\..\..\zstd\bitstream.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compiler.h" />
|
||||
<ClInclude Include="..\..\..\zstd\cpu.h" />
|
||||
<ClInclude Include="..\..\..\zstd\debug.h" />
|
||||
<ClInclude Include="..\..\..\zstd\error_private.h" />
|
||||
<ClInclude Include="..\..\..\zstd\fse.h" />
|
||||
<ClInclude Include="..\..\..\zstd\hist.h" />
|
||||
<ClInclude Include="..\..\..\zstd\huf.h" />
|
||||
<ClInclude Include="..\..\..\zstd\mem.h" />
|
||||
<ClInclude Include="..\..\..\zstd\pool.h" />
|
||||
<ClInclude Include="..\..\..\zstd\threading.h" />
|
||||
<ClInclude Include="..\..\..\zstd\xxhash.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_lazy.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ldm.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_opt.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
||||
@@ -10,6 +10,12 @@
|
||||
<Filter Include="common">
|
||||
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd">
|
||||
<UniqueIdentifier>{043ecb94-f240-4986-94b0-bc5bbd415a82}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="getopt">
|
||||
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@@ -30,9 +36,6 @@
|
||||
<ClCompile Include="..\..\src\capture.cpp">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\getopt.c">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
@@ -45,6 +48,90 @@
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\debug.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\entropy_common.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\error_private.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\fse_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\fse_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\hist.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\huf_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\huf_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\pool.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\threading.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\xxhash.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_common.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ddict.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_fast.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_lazy.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ldm.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_opt.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c">
|
||||
<Filter>getopt</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@@ -71,9 +158,6 @@
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
@@ -98,18 +182,9 @@
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\src\getopt.h">
|
||||
<Filter>src</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_benaphore.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_sema.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
@@ -122,5 +197,104 @@
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\bitstream.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compiler.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\cpu.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\debug.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\error_private.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\fse.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\hist.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\huf.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\mem.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\pool.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\threading.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\xxhash.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ddict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_fast.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_lazy.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ldm.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_opt.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h">
|
||||
<Filter>getopt</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -15,22 +15,19 @@
|
||||
#include "../../server/TracyMemory.hpp"
|
||||
#include "../../server/TracyPrint.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "getopt.h"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
|
||||
#ifndef _MSC_VER
|
||||
struct sigaction oldsigint;
|
||||
bool disconnect = false;
|
||||
|
||||
void SigInt( int )
|
||||
{
|
||||
disconnect = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void Usage()
|
||||
{
|
||||
printf( "Usage: capture -a address -o output.tracy [-p port]\n" );
|
||||
printf( "Usage: capture -o output.tracy [-a address] [-p port]\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
@@ -44,7 +41,7 @@ int main( int argc, char** argv )
|
||||
}
|
||||
#endif
|
||||
|
||||
const char* address = nullptr;
|
||||
const char* address = "localhost";
|
||||
const char* output = nullptr;
|
||||
int port = 8086;
|
||||
|
||||
@@ -95,8 +92,10 @@ int main( int argc, char** argv )
|
||||
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
|
||||
|
||||
#ifndef _MSC_VER
|
||||
struct sigaction sigint;
|
||||
#ifdef _WIN32
|
||||
signal( SIGINT, SigInt );
|
||||
#else
|
||||
struct sigaction sigint, oldsigint;
|
||||
memset( &sigint, 0, sizeof( sigint ) );
|
||||
sigint.sa_handler = SigInt;
|
||||
sigaction( SIGINT, &sigint, &oldsigint );
|
||||
@@ -104,15 +103,14 @@ int main( int argc, char** argv )
|
||||
|
||||
auto& lock = worker.GetMbpsDataLock();
|
||||
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
while( worker.IsConnected() )
|
||||
{
|
||||
#ifndef _MSC_VER
|
||||
if( disconnect )
|
||||
{
|
||||
worker.Disconnect();
|
||||
disconnect = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
lock.lock();
|
||||
const auto mbps = worker.GetMbpsData().back();
|
||||
@@ -128,16 +126,17 @@ int main( int argc, char** argv )
|
||||
{
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
|
||||
}
|
||||
printf( " \033[0m /\033[36;1m%5.1f%% \033[0m=\033[33;1m%7.2f Mbps \033[0m| \033[33mNet: \033[32m%s \033[0m| \033[33mMem: \033[31;1m%.2f MB\033[0m | \033[33mTime: %s\033[0m",
|
||||
printf( " \033[0m /\033[36;1m%5.1f%% \033[0m=\033[33;1m%7.2f Mbps \033[0m| \033[33mNet: \033[32m%s \033[0m| \033[33mMem: \033[31;1m%s\033[0m | \033[33mTime: %s\033[0m",
|
||||
compRatio * 100.f,
|
||||
mbps / compRatio,
|
||||
tracy::MemSizeToString( netTotal ),
|
||||
tracy::memUsage.load( std::memory_order_relaxed ) / ( 1024.f * 1024.f ),
|
||||
tracy::MemSizeToString( tracy::memUsage ),
|
||||
tracy::TimeToString( worker.GetLastTime() ) );
|
||||
fflush( stdout );
|
||||
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
}
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const auto& failure = worker.GetFailureType();
|
||||
if( failure != tracy::Worker::Failure::None )
|
||||
@@ -145,13 +144,18 @@ int main( int argc, char** argv )
|
||||
printf( "\n\033[31;1mInstrumentation failure: %s\033[0m", tracy::Worker::GetFailureString( failure ) );
|
||||
}
|
||||
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nSaving trace...", worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount(), true ) );
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount() ),
|
||||
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
|
||||
fflush( stdout );
|
||||
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output ) );
|
||||
if( f )
|
||||
{
|
||||
worker.Write( *f );
|
||||
printf( " \033[32;1mdone!\033[0m\n" );
|
||||
f->Finish();
|
||||
const auto stats = f->GetCompressionStatistics();
|
||||
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined __linux__ && defined __ARM_ARCH
|
||||
|
||||
static const char* DecodeArmImplementer( uint32_t v )
|
||||
{
|
||||
static char buf[16];
|
||||
@@ -12,6 +14,7 @@ static const char* DecodeArmImplementer( uint32_t v )
|
||||
case 0x44: return "DEC";
|
||||
case 0x46: return "Fujitsu";
|
||||
case 0x48: return "HiSilicon";
|
||||
case 0x49: return "Infineon";
|
||||
case 0x4d: return "Motorola";
|
||||
case 0x4e: return "Nvidia";
|
||||
case 0x50: return "Applied Micro";
|
||||
@@ -23,6 +26,7 @@ static const char* DecodeArmImplementer( uint32_t v )
|
||||
case 0x66: return "Faraday";
|
||||
case 0x68: return "HXT";
|
||||
case 0x69: return "Intel";
|
||||
case 0xc0: return "Ampere Computing";
|
||||
default: break;
|
||||
}
|
||||
sprintf( buf, "0x%x", v );
|
||||
@@ -71,6 +75,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xc60: return " Cortex-M0+";
|
||||
case 0xd00: return " AArch64 simulator";
|
||||
case 0xd01: return " Cortex-A32";
|
||||
case 0xd02: return " Cortex-A34";
|
||||
case 0xd03: return " Cortex-A53";
|
||||
case 0xd04: return " Cortex-A35";
|
||||
case 0xd05: return " Cortex-A55";
|
||||
@@ -87,6 +92,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xd13: return " Cortex-R52";
|
||||
case 0xd20: return " Cortex-M23";
|
||||
case 0xd21: return " Cortex-M33";
|
||||
case 0xd40: return " Zeus";
|
||||
case 0xd41: return " Cortex-A78";
|
||||
case 0xd43: return " Cortex-A65AE";
|
||||
case 0xd44: return " Cortex-X1";
|
||||
case 0xd4a: return " Neoverse E1";
|
||||
default: break;
|
||||
}
|
||||
@@ -106,6 +115,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xa2: return " ThunderX 81XX";
|
||||
case 0xa3: return " ThunderX 83XX";
|
||||
case 0xaf: return " ThunderX2 99xx";
|
||||
case 0xb0: return " OcteonTX2";
|
||||
case 0xb1: return " OcteonTX2 T98";
|
||||
case 0xb2: return " OcteonTX2 T96";
|
||||
case 0xb3: return " OcteonTX2 F95";
|
||||
case 0xb4: return " OcteonTX2 F95N";
|
||||
case 0xb5: return " OcteonTX2 F95MM";
|
||||
case 0xb8: return " ThunderX3 T110";
|
||||
default: break;
|
||||
}
|
||||
case 0x44:
|
||||
@@ -208,6 +224,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
return buf;
|
||||
}
|
||||
|
||||
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
|
||||
|
||||
static const char* DecodeIosDevice( const char* id )
|
||||
{
|
||||
static const char* DeviceTable[] = {
|
||||
@@ -245,6 +263,10 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPhone11,4", "iPhone XS Max",
|
||||
"iPhone11,6", "iPhone XS Max China",
|
||||
"iPhone11,8", "iPhone XR",
|
||||
"iPhone12,1", "iPhone 11",
|
||||
"iPhone12,3", "iPhone 11 Pro",
|
||||
"iPhone12,5", "iPhone 11 Pro Max",
|
||||
"iPhone12,8", "iPhone SE 2nd Gen",
|
||||
"iPad1,1", "iPad (A1219/A1337)",
|
||||
"iPad2,1", "iPad 2 (A1395)",
|
||||
"iPad2,2", "iPad 2 (A1396)",
|
||||
@@ -285,6 +307,8 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPad7,4", "iPad Pro 10.5\" (A1709)",
|
||||
"iPad7,5", "iPad 6th gen (A1893)",
|
||||
"iPad7,6", "iPad 6th gen (A1954)",
|
||||
"iPad7,11", "iPad 7th gen 10.2\" (Wifi)",
|
||||
"iPad7,12", "iPad 7th gen 10.2\" (Wifi+Cellular)",
|
||||
"iPad8,1", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,2", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,3", "iPad Pro 11\" (A1934/A1979/A2013)",
|
||||
@@ -293,6 +317,10 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
|
||||
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
|
||||
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
|
||||
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
|
||||
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
|
||||
"iPad11,1", "iPad Mini 5th gen (A2133)",
|
||||
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
|
||||
"iPad11,3", "iPad Air 3rd gen (A2152)",
|
||||
@@ -316,4 +344,6 @@ static const char* DecodeIosDevice( const char* id )
|
||||
return id;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#include <algorithm>
|
||||
#include <new>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
|
||||
@@ -9,6 +12,7 @@
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <windows.h>
|
||||
# include <psapi.h>
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( push )
|
||||
# pragma warning( disable : 4091 )
|
||||
@@ -17,11 +21,11 @@
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( pop )
|
||||
# endif
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "../libbacktrace/backtrace.hpp"
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
#elif TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
#endif
|
||||
@@ -29,14 +33,38 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline char* CopyString( const char* src, size_t sz )
|
||||
{
|
||||
assert( strlen( src ) == sz );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static inline char* CopyString( const char* src )
|
||||
{
|
||||
const auto sz = strlen( src );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
|
||||
enum { MaxCbTrace = 16 };
|
||||
enum { MaxNameSize = 8*1024 };
|
||||
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
|
||||
extern "C" { t_RtlWalkFrameChain RtlWalkFrameChain = 0; }
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
t_RtlWalkFrameChain RtlWalkFrameChain = 0;
|
||||
}
|
||||
|
||||
#if defined __MINGW32__ && API_VERSION_NUMBER < 12
|
||||
extern "C" {
|
||||
@@ -51,26 +79,81 @@ BOOL IMAGEAPI SymGetLineFromInlineContext(HANDLE hProcess, DWORD64 qwAddr, ULONG
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
struct ModuleCache
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
char* name;
|
||||
};
|
||||
|
||||
static FastVector<ModuleCache>* s_modCache;
|
||||
#endif
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
#ifdef UNICODE
|
||||
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandle( L"ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
#else
|
||||
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandle( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
#endif
|
||||
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
|
||||
SymInitialize( GetCurrentProcess(), nullptr, true );
|
||||
SymSetOptions( SYMOPT_LOAD_LINES );
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
|
||||
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
|
||||
new(s_modCache) FastVector<ModuleCache>( 512 );
|
||||
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
char name[1024];
|
||||
const auto res = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( res > 0 )
|
||||
{
|
||||
auto ptr = name + res;
|
||||
while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > name ) ptr++;
|
||||
const auto namelen = name + res - ptr;
|
||||
auto cache = s_modCache->push_next();
|
||||
cache->start = base;
|
||||
cache->end = base + info.SizeOfImage;
|
||||
cache->name = (char*)tracy_malloc( namelen+3 );
|
||||
cache->name[0] = '[';
|
||||
memcpy( cache->name+1, ptr, namelen );
|
||||
cache->name[namelen+1] = ']';
|
||||
cache->name[namelen+2] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACY_API uintptr_t* CallTrace( int depth )
|
||||
{
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
|
||||
*trace = num;
|
||||
return trace;
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[1024];
|
||||
static char ret[MaxNameSize];
|
||||
const auto proc = GetCurrentProcess();
|
||||
|
||||
char buf[sizeof( SYMBOL_INFO ) + 1024];
|
||||
char buf[sizeof( SYMBOL_INFO ) + MaxNameSize];
|
||||
auto si = (SYMBOL_INFO*)buf;
|
||||
si->SizeOfStruct = sizeof( SYMBOL_INFO );
|
||||
si->MaxNameLen = 1024;
|
||||
si->MaxNameLen = MaxNameSize;
|
||||
|
||||
if( SymFromAddr( proc, ptr, nullptr, si ) == 0 )
|
||||
{
|
||||
@@ -84,12 +167,131 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char* GetModuleName( uint64_t addr )
|
||||
{
|
||||
if( ( addr & 0x8000000000000000 ) != 0 ) return "[kernel]";
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
for( auto& v : *s_modCache )
|
||||
{
|
||||
if( addr >= v.start && addr < v.end )
|
||||
{
|
||||
return v.name;
|
||||
}
|
||||
}
|
||||
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < base + info.SizeOfImage )
|
||||
{
|
||||
char name[1024];
|
||||
const auto res = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( res > 0 )
|
||||
{
|
||||
auto ptr = name + res;
|
||||
while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > name ) ptr++;
|
||||
const auto namelen = name + res - ptr;
|
||||
auto cache = s_modCache->push_next();
|
||||
cache->start = base;
|
||||
cache->end = base + info.SizeOfImage;
|
||||
cache->name = (char*)tracy_malloc( namelen+3 );
|
||||
cache->name[0] = '[';
|
||||
memcpy( cache->name+1, ptr, namelen );
|
||||
cache->name[namelen+1] = ']';
|
||||
cache->name[namelen+2] = '\0';
|
||||
return cache->name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return "[unknown]";
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
if( SymGetLineFromAddr64( GetCurrentProcess(), ptr, &displacement, &line ) == 0 )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
}
|
||||
sym.needFree = false;
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
const auto proc = GetCurrentProcess();
|
||||
bool done = false;
|
||||
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
DWORD inlineNum = SymAddrIncludeInlineTrace( proc, ptr );
|
||||
DWORD ctx = 0;
|
||||
DWORD idx;
|
||||
BOOL doInline = FALSE;
|
||||
if( inlineNum != 0 ) doInline = SymQueryInlineTrace( proc, ptr, 0, ptr, ptr, &ctx, &idx );
|
||||
if( doInline )
|
||||
{
|
||||
if( SymGetLineFromInlineContext( proc, ptr, ctx, 0, &displacement, &line ) != 0 )
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if( !done )
|
||||
{
|
||||
if( SymGetLineFromAddr64( proc, ptr, &displacement, &line ) == 0 )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
}
|
||||
}
|
||||
sym.needFree = false;
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
int write;
|
||||
const auto proc = GetCurrentProcess();
|
||||
#ifndef __CYGWIN__
|
||||
const auto inlineNum = std::min<DWORD>( MaxCbTrace - 1, SymAddrIncludeInlineTrace( proc, ptr ) );
|
||||
DWORD inlineNum = SymAddrIncludeInlineTrace( proc, ptr );
|
||||
if( inlineNum > MaxCbTrace - 1 ) inlineNum = MaxCbTrace - 1;
|
||||
DWORD ctx = 0;
|
||||
DWORD idx;
|
||||
BOOL doInline = FALSE;
|
||||
@@ -106,30 +308,21 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
cb_num = 1;
|
||||
}
|
||||
|
||||
char buf[sizeof( SYMBOL_INFO ) + 1024];
|
||||
char buf[sizeof( SYMBOL_INFO ) + MaxNameSize];
|
||||
auto si = (SYMBOL_INFO*)buf;
|
||||
si->SizeOfStruct = sizeof( SYMBOL_INFO );
|
||||
si->MaxNameLen = 1024;
|
||||
si->MaxNameLen = MaxNameSize;
|
||||
|
||||
if( SymFromAddr( proc, ptr, nullptr, si ) == 0 )
|
||||
{
|
||||
memcpy( si->Name, "[unknown]", 10 );
|
||||
si->NameLen = 9;
|
||||
}
|
||||
const auto moduleName = GetModuleName( ptr );
|
||||
const auto symValid = SymFromAddr( proc, ptr, nullptr, si ) != 0;
|
||||
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
|
||||
{
|
||||
auto name = (char*)tracy_malloc(si->NameLen + 1);
|
||||
memcpy(name, si->Name, si->NameLen);
|
||||
name[si->NameLen] = '\0';
|
||||
|
||||
cb_data[write].name = name;
|
||||
|
||||
const char* filename;
|
||||
if (SymGetLineFromAddr64(proc, ptr, &displacement, &line) == 0)
|
||||
if( SymGetLineFromAddr64( proc, ptr, &displacement, &line ) == 0 )
|
||||
{
|
||||
filename = "[unknown]";
|
||||
cb_data[write].line = 0;
|
||||
@@ -140,12 +333,18 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
cb_data[write].line = line.LineNumber;
|
||||
}
|
||||
|
||||
const auto fsz = strlen(filename);
|
||||
auto file = (char*)tracy_malloc(fsz + 1);
|
||||
memcpy(file, filename, fsz);
|
||||
file[fsz] = '\0';
|
||||
|
||||
cb_data[write].file = file;
|
||||
cb_data[write].name = symValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName );
|
||||
cb_data[write].file = CopyString( filename );
|
||||
if( symValid )
|
||||
{
|
||||
cb_data[write].symLen = si->Size;
|
||||
cb_data[write].symAddr = si->Address;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb_data[write].symLen = 0;
|
||||
cb_data[write].symAddr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
@@ -154,18 +353,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
for( DWORD i=0; i<inlineNum; i++ )
|
||||
{
|
||||
auto& cb = cb_data[i];
|
||||
|
||||
if( SymFromInlineContext( proc, ptr, ctx, nullptr, si ) == 0 )
|
||||
{
|
||||
memcpy( si->Name, "[unknown]", 10 );
|
||||
si->NameLen = 9;
|
||||
}
|
||||
|
||||
auto name = (char*)tracy_malloc( si->NameLen + 1 );
|
||||
memcpy( name, si->Name, si->NameLen );
|
||||
name[si->NameLen] = '\0';
|
||||
cb.name = name;
|
||||
|
||||
const auto symInlineValid = SymFromInlineContext( proc, ptr, ctx, nullptr, si ) != 0;
|
||||
const char* filename;
|
||||
if( SymGetLineFromInlineContext( proc, ptr, ctx, 0, &displacement, &line ) == 0 )
|
||||
{
|
||||
@@ -178,162 +366,42 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
cb.line = line.LineNumber;
|
||||
}
|
||||
|
||||
const auto fsz = strlen( filename );
|
||||
auto file = (char*)tracy_malloc( fsz + 1 );
|
||||
memcpy( file, filename, fsz );
|
||||
file[fsz] = '\0';
|
||||
cb.file = file;
|
||||
cb.name = symInlineValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName );
|
||||
cb.file = CopyString( filename );
|
||||
if( symInlineValid )
|
||||
{
|
||||
cb.symLen = si->Size;
|
||||
cb.symAddr = si->Address;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.symLen = 0;
|
||||
cb.symAddr = 0;
|
||||
}
|
||||
|
||||
ctx++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return { cb_data, uint8_t( cb_num ) };
|
||||
return { cb_data, uint8_t( cb_num ), moduleName };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 4
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[1024];
|
||||
auto vptr = (void*)ptr;
|
||||
char** sym = nullptr;
|
||||
const char* symname = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) && dlinfo.dli_sname )
|
||||
{
|
||||
symname = dlinfo.dli_sname;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym = backtrace_symbols( &vptr, 1 );
|
||||
if( sym )
|
||||
{
|
||||
symname = *sym;
|
||||
}
|
||||
}
|
||||
if( symname )
|
||||
{
|
||||
strcpy( ret, symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
*ret = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
static CallstackEntry cb;
|
||||
cb.line = 0;
|
||||
|
||||
char* demangled = nullptr;
|
||||
const char* symname = nullptr;
|
||||
const char* symloc = nullptr;
|
||||
auto vptr = (void*)ptr;
|
||||
char** sym = nullptr;
|
||||
ptrdiff_t symoff = 0;
|
||||
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
{
|
||||
symloc = dlinfo.dli_fname;
|
||||
symname = dlinfo.dli_sname;
|
||||
symoff = (char*)ptr - (char*)dlinfo.dli_saddr;
|
||||
|
||||
if( symname && symname[0] == '_' )
|
||||
{
|
||||
size_t len = 0;
|
||||
int status;
|
||||
demangled = abi::__cxa_demangle( symname, nullptr, &len, &status );
|
||||
if( status == 0 )
|
||||
{
|
||||
symname = demangled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( !symname )
|
||||
{
|
||||
sym = backtrace_symbols( &vptr, 1 );
|
||||
if( !sym )
|
||||
{
|
||||
symname = "[unknown]";
|
||||
}
|
||||
else
|
||||
{
|
||||
symname = *sym;
|
||||
}
|
||||
}
|
||||
if( !symloc )
|
||||
{
|
||||
symloc = "[unknown]";
|
||||
}
|
||||
|
||||
if( symoff == 0 )
|
||||
{
|
||||
const auto namelen = strlen( symname );
|
||||
auto name = (char*)tracy_malloc( namelen + 1 );
|
||||
memcpy( name, symname, namelen );
|
||||
name[namelen] = '\0';
|
||||
cb.name = name;
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[32];
|
||||
const auto offlen = sprintf( buf, " + %td", symoff );
|
||||
const auto namelen = strlen( symname );
|
||||
auto name = (char*)tracy_malloc( namelen + offlen + 1 );
|
||||
memcpy( name, symname, namelen );
|
||||
memcpy( name + namelen, buf, offlen );
|
||||
name[namelen + offlen] = '\0';
|
||||
cb.name = name;
|
||||
}
|
||||
|
||||
char buf[32];
|
||||
const auto addrlen = sprintf( buf, " [%p]", (void*)ptr );
|
||||
const auto loclen = strlen( symloc );
|
||||
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
|
||||
memcpy( loc, symloc, loclen );
|
||||
memcpy( loc + loclen, buf, addrlen );
|
||||
loc[loclen + addrlen] = '\0';
|
||||
cb.file = loc;
|
||||
|
||||
if( sym ) free( sym );
|
||||
if( demangled ) free( demangled );
|
||||
|
||||
return { &cb, 1 };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
enum { MaxCbTrace = 16 };
|
||||
|
||||
struct backtrace_state* cb_bts;
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
int cb_fixup;
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
cb_bts = backtrace_create_state( nullptr, 0, nullptr, nullptr );
|
||||
}
|
||||
|
||||
static inline char* CopyString( const char* src )
|
||||
{
|
||||
const auto sz = strlen( src );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int FastCallstackDataCb( void* data, uintptr_t pc, const char* fn, int lineno, const char* function )
|
||||
static int FastCallstackDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
if( function )
|
||||
{
|
||||
@@ -372,22 +440,62 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int CallstackDataCb( void* /*data*/, uintptr_t pc, const char* fn, int lineno, const char* function )
|
||||
static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
auto& sym = *(CallstackSymbolData*)data;
|
||||
if( !fn )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
sym.needFree = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = CopyString( fn );
|
||||
sym.line = lineno;
|
||||
sym.needFree = true;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
auto& sym = *(CallstackSymbolData*)data;
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
sym.needFree = false;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym );
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
return DecodeSymbolAddress( ptr );
|
||||
}
|
||||
|
||||
static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
enum { DemangleBufLen = 64*1024 };
|
||||
char demangled[DemangleBufLen];
|
||||
|
||||
cb_data[cb_num].symLen = 0;
|
||||
cb_data[cb_num].symAddr = (uint64_t)lowaddr;
|
||||
|
||||
if( !fn && !function )
|
||||
{
|
||||
const char* symname = nullptr;
|
||||
const char* symloc = nullptr;
|
||||
auto vptr = (void*)pc;
|
||||
ptrdiff_t symoff = 0;
|
||||
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
{
|
||||
symloc = dlinfo.dli_fname;
|
||||
symname = dlinfo.dli_sname;
|
||||
symoff = (char*)pc - (char*)dlinfo.dli_saddr;
|
||||
|
||||
@@ -404,7 +512,6 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, const char* fn, int li
|
||||
}
|
||||
|
||||
if( !symname ) symname = "[unknown]";
|
||||
if( !symloc ) symloc = "[unknown]";
|
||||
|
||||
if( symoff == 0 )
|
||||
{
|
||||
@@ -422,15 +529,7 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, const char* fn, int li
|
||||
cb_data[cb_num].name = name;
|
||||
}
|
||||
|
||||
char buf[32];
|
||||
const auto addrlen = sprintf( buf, " [%p]", (void*)pc );
|
||||
const auto loclen = strlen( symloc );
|
||||
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
|
||||
memcpy( loc, symloc, loclen );
|
||||
memcpy( loc + loclen, buf, addrlen );
|
||||
loc[loclen + addrlen] = '\0';
|
||||
cb_data[cb_num].file = loc;
|
||||
|
||||
cb_data[cb_num].file = CopyString( "[unknown]" );
|
||||
cb_data[cb_num].line = 0;
|
||||
}
|
||||
else
|
||||
@@ -484,12 +583,31 @@ static void CallstackErrorCb( void* /*data*/, const char* /*msg*/, int /*errnum*
|
||||
cb_num = 1;
|
||||
}
|
||||
|
||||
void SymInfoCallback( void* /*data*/, uintptr_t pc, const char* symname, uintptr_t symval, uintptr_t symsize )
|
||||
{
|
||||
cb_data[cb_num-1].symLen = (uint32_t)symsize;
|
||||
cb_data[cb_num-1].symAddr = (uint64_t)symval;
|
||||
}
|
||||
|
||||
void SymInfoError( void* /*data*/, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
cb_data[cb_num-1].symLen = 0;
|
||||
cb_data[cb_num-1].symAddr = 0;
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
cb_num = 0;
|
||||
backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr );
|
||||
assert( cb_num > 0 );
|
||||
return { cb_data, uint8_t( cb_num ) };
|
||||
|
||||
backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr );
|
||||
|
||||
const char* symloc = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
|
||||
|
||||
return { cb_data, uint8_t( cb_num ), symloc ? symloc : "[unknown]" };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
@@ -502,7 +620,6 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[1024];
|
||||
auto vptr = (void*)ptr;
|
||||
char** sym = nullptr;
|
||||
const char* symname = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) && dlinfo.dli_sname )
|
||||
@@ -520,6 +637,20 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
return ret;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
const char* symloc = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
|
||||
if( !symloc ) symloc = "[unknown]";
|
||||
return CallstackSymbolData { symloc, 0, false };
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
return DecodeSymbolAddress( ptr );
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
static CallstackEntry cb;
|
||||
@@ -529,8 +660,8 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
const char* symname = nullptr;
|
||||
const char* symloc = nullptr;
|
||||
auto vptr = (void*)ptr;
|
||||
char** sym = nullptr;
|
||||
ptrdiff_t symoff = 0;
|
||||
void* symaddr = nullptr;
|
||||
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
@@ -538,6 +669,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
symloc = dlinfo.dli_fname;
|
||||
symname = dlinfo.dli_sname;
|
||||
symoff = (char*)ptr - (char*)dlinfo.dli_saddr;
|
||||
symaddr = dlinfo.dli_saddr;
|
||||
|
||||
if( symname && symname[0] == '_' )
|
||||
{
|
||||
@@ -551,22 +683,12 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
}
|
||||
}
|
||||
|
||||
if( !symname )
|
||||
{
|
||||
symname = "[unknown]";
|
||||
}
|
||||
if( !symloc )
|
||||
{
|
||||
symloc = "[unknown]";
|
||||
}
|
||||
if( !symname ) symname = "[unknown]";
|
||||
if( !symloc ) symloc = "[unknown]";
|
||||
|
||||
if( symoff == 0 )
|
||||
{
|
||||
const auto namelen = strlen( symname );
|
||||
auto name = (char*)tracy_malloc( namelen + 1 );
|
||||
memcpy( name, symname, namelen );
|
||||
name[namelen] = '\0';
|
||||
cb.name = name;
|
||||
cb.name = CopyString( symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -580,19 +702,13 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
cb.name = name;
|
||||
}
|
||||
|
||||
char buf[32];
|
||||
const auto addrlen = sprintf( buf, " [%p]", (void*)ptr );
|
||||
const auto loclen = strlen( symloc );
|
||||
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
|
||||
memcpy( loc, symloc, loclen );
|
||||
memcpy( loc + loclen, buf, addrlen );
|
||||
loc[loclen + addrlen] = '\0';
|
||||
cb.file = loc;
|
||||
cb.file = CopyString( "[unknown]" );
|
||||
cb.symLen = 0;
|
||||
cb.symAddr = (uint64_t)symaddr;
|
||||
|
||||
if( sym ) free( sym );
|
||||
if( demangled ) free( demangled );
|
||||
|
||||
return { &cb, 1 };
|
||||
return { &cb, 1, symloc };
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
#ifndef __TRACYCALLSTACK_H__
|
||||
#define __TRACYCALLSTACK_H__
|
||||
|
||||
#if !defined _WIN32 && !defined __CYGWIN__
|
||||
# include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
#elif defined __ANDROID__
|
||||
@@ -17,6 +21,8 @@
|
||||
# endif
|
||||
#elif defined __APPLE__
|
||||
# define TRACY_HAS_CALLSTACK 4
|
||||
#elif defined BSD
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
#ifndef __TRACYCALLSTACK_HPP__
|
||||
#define __TRACYCALLSTACK_HPP__
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "TracyCallstack.h"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
extern t_RtlWalkFrameChain RtlWalkFrameChain;
|
||||
}
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
# include <unwind.h>
|
||||
#elif TRACY_HAS_CALLSTACK >= 3
|
||||
# include <execinfo.h>
|
||||
@@ -20,7 +15,6 @@ extern "C"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
@@ -28,34 +22,43 @@ extern "C"
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct CallstackSymbolData
|
||||
{
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
bool needFree;
|
||||
};
|
||||
|
||||
struct CallstackEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t symLen;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct CallstackEntryData
|
||||
{
|
||||
const CallstackEntry* data;
|
||||
uint8_t size;
|
||||
const char* imageName;
|
||||
};
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr );
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
|
||||
void InitCallstack();
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
|
||||
TRACY_API uintptr_t* CallTrace( int depth );
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
|
||||
*trace = num;
|
||||
|
||||
return trace;
|
||||
return CallTrace( depth );
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
@@ -91,7 +94,7 @@ static tracy_force_inline void* Callstack( int depth )
|
||||
return trace;
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4
|
||||
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
|
||||
@@ -18,10 +18,8 @@
|
||||
# include <intrin.h>
|
||||
# else
|
||||
# include <x86intrin.h>
|
||||
# ifdef __CYGWIN__
|
||||
# ifndef _mm256_cvtsi256_si32
|
||||
# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
|
||||
# endif
|
||||
# ifndef _mm256_cvtsi256_si32
|
||||
# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
@@ -92,56 +90,6 @@ static const uint16_t DivTable[255*3+1] = {
|
||||
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
|
||||
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
|
||||
};
|
||||
static const uint16_t DivTableAVX[255*3+1] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
|
||||
0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
|
||||
0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
|
||||
0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
|
||||
0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
|
||||
0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
|
||||
0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
|
||||
0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
|
||||
0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
|
||||
0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
|
||||
0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
|
||||
0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
|
||||
0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
|
||||
0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
|
||||
0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
|
||||
0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
|
||||
0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
|
||||
0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
|
||||
0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
|
||||
0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
|
||||
0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
|
||||
0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
|
||||
0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
|
||||
0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
|
||||
0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
|
||||
0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
|
||||
0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
|
||||
0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
|
||||
0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
|
||||
0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
|
||||
0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
|
||||
0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
|
||||
0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
|
||||
0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
|
||||
0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
|
||||
0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
|
||||
0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
|
||||
0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
|
||||
0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
|
||||
0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
|
||||
0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
|
||||
0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
|
||||
0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
|
||||
0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
|
||||
0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
|
||||
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
|
||||
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
|
||||
};
|
||||
static const uint16_t DivTableNEON[255*3+1] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
|
||||
@@ -224,6 +172,12 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
}
|
||||
|
||||
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm_and_si128( px0, amask );
|
||||
px1 = _mm_and_si128( px1, amask );
|
||||
px2 = _mm_and_si128( px2, amask );
|
||||
px3 = _mm_and_si128( px3, amask );
|
||||
|
||||
__m128i min0 = _mm_min_epu8( px0, px1 );
|
||||
__m128i min1 = _mm_min_epu8( px2, px3 );
|
||||
__m128i min2 = _mm_min_epu8( min0, min1 );
|
||||
@@ -455,19 +409,20 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
|
||||
# endif
|
||||
#else
|
||||
const auto ref = to565( src[0], src[1], src[2] );
|
||||
uint32_t ref;
|
||||
memcpy( &ref, src, 4 );
|
||||
uint32_t refMask = ref & 0xF8FCF8;
|
||||
auto stmp = src + 4;
|
||||
for( int i=1; i<16; i++ )
|
||||
{
|
||||
if( to565( stmp[0], stmp[1], stmp[2] ) != ref )
|
||||
{
|
||||
break;
|
||||
}
|
||||
uint32_t px;
|
||||
memcpy( &px, stmp, 4 );
|
||||
if( ( px & 0xF8FCF8 ) != refMask ) break;
|
||||
stmp += 4;
|
||||
}
|
||||
if( stmp == src + 64 )
|
||||
{
|
||||
return uint64_t( ref ) << 16;
|
||||
return uint64_t( to565( ref ) ) << 16;
|
||||
}
|
||||
|
||||
uint8_t min[3] = { src[0], src[1], src[2] };
|
||||
@@ -513,6 +468,42 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
__m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
|
||||
__m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
|
||||
|
||||
__m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
|
||||
__m256i sd0 = _mm256_and_si256( px0, smask );
|
||||
__m256i sd1 = _mm256_and_si256( px1, smask );
|
||||
__m256i sd2 = _mm256_and_si256( px2, smask );
|
||||
__m256i sd3 = _mm256_and_si256( px3, smask );
|
||||
|
||||
__m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m256i sc0 = _mm256_cmpeq_epi8( sd0, sc );
|
||||
__m256i sc1 = _mm256_cmpeq_epi8( sd1, sc );
|
||||
__m256i sc2 = _mm256_cmpeq_epi8( sd2, sc );
|
||||
__m256i sc3 = _mm256_cmpeq_epi8( sd3, sc );
|
||||
|
||||
__m256i sm0 = _mm256_and_si256( sc0, sc1 );
|
||||
__m256i sm1 = _mm256_and_si256( sc2, sc3 );
|
||||
__m256i sm = _mm256_and_si256( sm0, sm1 );
|
||||
|
||||
const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
|
||||
const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
|
||||
|
||||
if( solid0 + solid1 == 0 )
|
||||
{
|
||||
const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) ) << 16;
|
||||
memcpy( dst, &c0, 8 );
|
||||
memcpy( dst+8, &c1, 8 );
|
||||
dst += 16;
|
||||
return;
|
||||
}
|
||||
|
||||
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm256_and_si256( px0, amask );
|
||||
px1 = _mm256_and_si256( px1, amask );
|
||||
px2 = _mm256_and_si256( px2, amask );
|
||||
px3 = _mm256_and_si256( px3, amask );
|
||||
|
||||
__m256i min0 = _mm256_min_epu8( px0, px1 );
|
||||
__m256i min1 = _mm256_min_epu8( px2, px3 );
|
||||
__m256i min2 = _mm256_min_epu8( min0, min1 );
|
||||
@@ -534,8 +525,8 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
__m256i range1 = _mm256_subs_epu8( rmax, rmin );
|
||||
__m256i range2 = _mm256_sad_epu8( rmax, rmin );
|
||||
|
||||
uint16_t vrange0 = DivTableAVX[_mm256_cvtsi256_si32( range2 ) >> 1];
|
||||
uint16_t vrange1 = DivTableAVX[_mm256_extract_epi16( range2, 8 ) >> 1];
|
||||
uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
|
||||
uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
|
||||
__m256i range00 = _mm256_set1_epi16( vrange0 );
|
||||
__m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
|
||||
|
||||
@@ -579,7 +570,11 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
|
||||
__m256i d0 = _mm256_unpacklo_epi32( mm5, p );
|
||||
__m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
|
||||
_mm_storeu_si128( (__m128i*)dst, _mm256_castsi256_si128( d1 ) );
|
||||
__m128i d2 = _mm256_castsi256_si128( d1 );
|
||||
|
||||
__m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
|
||||
__m128i d3 = _mm_and_si128( d2, mask );
|
||||
_mm_storeu_si128( (__m128i*)dst, d3 );
|
||||
dst += 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef __TRACYFASTVECTOR_HPP__
|
||||
#define __TRACYFASTVECTOR_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
@@ -21,6 +22,7 @@ public:
|
||||
, m_write( m_ptr )
|
||||
, m_end( m_ptr + capacity )
|
||||
{
|
||||
assert( capacity != 0 );
|
||||
}
|
||||
|
||||
FastVector( const FastVector& ) = delete;
|
||||
|
||||
@@ -23,10 +23,7 @@ public:
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
@@ -35,7 +32,7 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
LockableCtx( const LockableCtx& ) = delete;
|
||||
@@ -43,18 +40,14 @@ public:
|
||||
|
||||
tracy_force_inline ~LockableCtx()
|
||||
{
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockTerminate.type, LockType::Lockable );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
@@ -77,7 +70,6 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::Lockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -161,6 +153,22 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
@@ -207,6 +215,11 @@ public:
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
LockableCtx m_ctx;
|
||||
@@ -225,21 +238,16 @@ public:
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
SharedLockableCtx( const SharedLockableCtx& ) = delete;
|
||||
@@ -247,20 +255,14 @@ public:
|
||||
|
||||
tracy_force_inline ~SharedLockableCtx()
|
||||
{
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
@@ -283,7 +285,6 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::SharedLockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -366,7 +367,6 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::SharedLockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -450,6 +450,22 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
@@ -516,12 +532,17 @@ public:
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
SharedLockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,20 +3,19 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "tracy_concurrentqueue.h"
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracySysTime.hpp"
|
||||
#include "TracySysTrace.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyMutex.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../common/TracyProtocol.hpp"
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <intrin.h>
|
||||
@@ -26,10 +25,14 @@
|
||||
# include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
|
||||
#if defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 )
|
||||
# define TRACY_HW_TIMER
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_HW_TIMER
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
#ifndef TracyConcat
|
||||
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
|
||||
#endif
|
||||
@@ -39,6 +42,10 @@
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME)
|
||||
void StartupProfiler();
|
||||
void ShutdownProfiler();
|
||||
#endif
|
||||
|
||||
class GpuCtx;
|
||||
class Profiler;
|
||||
@@ -56,8 +63,9 @@ TRACY_API std::atomic<uint32_t>& GetLockCounter();
|
||||
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
|
||||
TRACY_API GpuCtxWrapper& GetGpuCtx();
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
|
||||
TRACY_API void InitRPMallocThread();
|
||||
TRACY_API bool ProfilerAvailable();
|
||||
TRACY_API int64_t GetFrequencyQpc();
|
||||
|
||||
struct SourceLocationData
|
||||
{
|
||||
@@ -76,15 +84,36 @@ struct LuaZoneState
|
||||
};
|
||||
#endif
|
||||
|
||||
using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
|
||||
|
||||
#define TracyLfqPrepare( _type ) \
|
||||
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
#define TracyLfqPrepareC( _type ) \
|
||||
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = tracy::GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommitC \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
|
||||
typedef void(*ParameterCallback)( uint32_t idx, int32_t val );
|
||||
|
||||
class Profiler
|
||||
{
|
||||
struct FrameImageQueueItem
|
||||
{
|
||||
void* image;
|
||||
uint64_t frame;
|
||||
uint32_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t offset;
|
||||
@@ -95,21 +124,19 @@ public:
|
||||
Profiler();
|
||||
~Profiler();
|
||||
|
||||
void SpawnWorkerThreads();
|
||||
|
||||
static tracy_force_inline int64_t GetTime()
|
||||
{
|
||||
#ifdef TRACY_HW_TIMER
|
||||
# if TARGET_OS_IOS == 1
|
||||
# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1
|
||||
return mach_absolute_time();
|
||||
# elif __ARM_ARCH >= 6
|
||||
# ifdef CLOCK_MONOTONIC_RAW
|
||||
struct timespec ts;
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
|
||||
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
|
||||
# else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
# endif
|
||||
# elif defined _WIN32 || defined __CYGWIN__
|
||||
# ifdef TRACY_TIMER_QPC
|
||||
return GetTimeQpc();
|
||||
# else
|
||||
return int64_t( __rdtsc() );
|
||||
# endif
|
||||
# elif defined __i386 || defined _M_IX86
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) );
|
||||
@@ -118,9 +145,17 @@ public:
|
||||
uint64_t rax, rdx;
|
||||
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
return ( rdx << 32 ) + rax;
|
||||
# else
|
||||
# error "TRACY_HW_TIMER detection logic needs fixing"
|
||||
# endif
|
||||
#else
|
||||
# if defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
struct timespec ts;
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
|
||||
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
|
||||
# else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -149,14 +184,10 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::FrameMarkMsg );
|
||||
TracyLfqPrepare( QueueType::FrameMarkMsg );
|
||||
MemWrite( &item->frameMark.time, GetTime() );
|
||||
MemWrite( &item->frameMark.name, uint64_t( name ) );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
|
||||
@@ -175,6 +206,7 @@ public:
|
||||
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
#endif
|
||||
@@ -185,7 +217,7 @@ public:
|
||||
profiler.m_fiLock.lock();
|
||||
auto fi = profiler.m_fiQueue.prepare_next();
|
||||
fi->image = ptr;
|
||||
fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset;
|
||||
fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset );
|
||||
fi->w = w;
|
||||
fi->h = h;
|
||||
fi->flip = flip;
|
||||
@@ -198,16 +230,12 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::PlotData );
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Int );
|
||||
MemWrite( &item->plotData.data.i, val );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, float val )
|
||||
@@ -215,16 +243,12 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::PlotData );
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Float );
|
||||
MemWrite( &item->plotData.data.f, val );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, double val )
|
||||
@@ -232,112 +256,114 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::PlotData );
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Double );
|
||||
MemWrite( &item->plotData.data.d, val );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size )
|
||||
static tracy_force_inline void ConfigurePlot( const char* name, PlotFormatType type )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::Message );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::MessageLiteral );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)txt );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::MessageColor );
|
||||
MemWrite( &item->messageColor.time, GetTime() );
|
||||
MemWrite( &item->messageColor.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::MessageLiteralColor );
|
||||
MemWrite( &item->messageColor.time, GetTime() );
|
||||
MemWrite( &item->messageColor.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::MessageAppInfo );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
TracyLfqPrepare( QueueType::PlotConfig );
|
||||
MemWrite( &item->plotConfig.name, (uint64_t)name );
|
||||
MemWrite( &item->plotConfig.type, (uint8_t)type );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, int callstack )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->messageLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
MemWrite( &item->messageColorFat.time, GetTime() );
|
||||
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColorLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
InitRPMallocThread();
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::MessageAppInfo );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
@@ -348,8 +374,9 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFree( const void* ptr )
|
||||
static tracy_force_inline void MemFree( const void* ptr, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
@@ -360,8 +387,9 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
@@ -369,7 +397,7 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
rpmalloc_thread_initialize();
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
@@ -377,12 +405,13 @@ public:
|
||||
SendCallstackMemory( callstack );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemAlloc( ptr, size );
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
@@ -390,7 +419,7 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
rpmalloc_thread_initialize();
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
@@ -398,7 +427,7 @@ public:
|
||||
SendCallstackMemory( callstack );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemFree( ptr );
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -406,27 +435,39 @@ public:
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto ptr = Callstack( depth );
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::Callstack );
|
||||
MemWrite( &item->callstack.ptr, ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void ParameterRegister( ParameterCallback cb ) { GetProfiler().m_paramCallback = cb; }
|
||||
static tracy_force_inline void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val )
|
||||
{
|
||||
TracyLfqPrepare( QueueType::ParamSetup );
|
||||
tracy::MemWrite( &item->paramSetup.idx, idx );
|
||||
tracy::MemWrite( &item->paramSetup.name, (uint64_t)name );
|
||||
tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool );
|
||||
tracy::MemWrite( &item->paramSetup.val, val );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void SendCallstack( int depth, const char* skipBefore );
|
||||
static void CutCallstack( void* callstack, const char* skipBefore );
|
||||
|
||||
static bool ShouldExit();
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline bool IsConnected() const
|
||||
{
|
||||
return m_isConnected.load( std::memory_order_acquire );
|
||||
}
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline uint64_t ConnectionId() const
|
||||
{
|
||||
return m_connectionId.load( std::memory_order_acquire );
|
||||
@@ -444,10 +485,61 @@ public:
|
||||
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
|
||||
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
|
||||
|
||||
void SendString( uint64_t ptr, const char* str, QueueType type );
|
||||
void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); }
|
||||
void SendString( uint64_t str, const char* ptr, size_t len, QueueType type );
|
||||
void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); }
|
||||
void SendSingleString( const char* ptr, size_t len );
|
||||
void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); }
|
||||
void SendSecondString( const char* ptr, size_t len );
|
||||
|
||||
|
||||
// Allocated source location data layout:
|
||||
// 2b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
// nsz zone name (optional)
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function )
|
||||
{
|
||||
return AllocSourceLocation( line, source, function, nullptr, 0 );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
|
||||
{
|
||||
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
|
||||
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
|
||||
const auto sz = uint16_t( sz32 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 2 );
|
||||
memset( ptr + 2, 0, 4 );
|
||||
memcpy( ptr + 6, &line, 4 );
|
||||
memcpy( ptr + 10, function, functionSz );
|
||||
ptr[10 + functionSz] = '\0';
|
||||
memcpy( ptr + 10 + functionSz + 1, source, sourceSz );
|
||||
ptr[10 + functionSz + 1 + sourceSz] = '\0';
|
||||
if( nameSz != 0 )
|
||||
{
|
||||
memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz );
|
||||
}
|
||||
return uint64_t( ptr );
|
||||
}
|
||||
|
||||
private:
|
||||
enum class DequeueStatus { Success, ConnectionLost, QueueEmpty };
|
||||
enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty };
|
||||
|
||||
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
|
||||
void Worker();
|
||||
@@ -460,9 +552,25 @@ private:
|
||||
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
|
||||
DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop );
|
||||
DequeueStatus DequeueSerial();
|
||||
bool AppendData( const void* data, size_t len );
|
||||
bool CommitData();
|
||||
bool NeedDataSize( size_t len );
|
||||
|
||||
tracy_force_inline bool AppendData( const void* data, size_t len )
|
||||
{
|
||||
const auto ret = NeedDataSize( len );
|
||||
AppendDataUnsafe( data, len );
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline bool NeedDataSize( size_t len )
|
||||
{
|
||||
assert( len <= TargetFrameSize );
|
||||
bool ret = true;
|
||||
if( m_bufferOffset - m_bufferStart + len > TargetFrameSize )
|
||||
{
|
||||
ret = CommitData();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline void AppendDataUnsafe( const void* data, size_t len )
|
||||
{
|
||||
@@ -475,21 +583,27 @@ private:
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
void SendSourceLocationPayload( uint64_t ptr );
|
||||
void SendCallstackPayload( uint64_t ptr );
|
||||
void SendCallstackPayload64( uint64_t ptr );
|
||||
void SendCallstackAlloc( uint64_t ptr );
|
||||
void SendCallstackFrame( uint64_t ptr );
|
||||
void SendCodeLocation( uint64_t ptr );
|
||||
|
||||
bool HandleServerQuery();
|
||||
void HandleDisconnect();
|
||||
void HandleParameter( uint64_t payload );
|
||||
void HandleSymbolQuery( uint64_t symbol );
|
||||
void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size );
|
||||
|
||||
void CalibrateTimer();
|
||||
void CalibrateDelay();
|
||||
void ReportTopology();
|
||||
|
||||
static tracy_force_inline void SendCallstackMemory( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackMemory );
|
||||
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
#endif
|
||||
}
|
||||
@@ -511,7 +625,8 @@ private:
|
||||
else
|
||||
{
|
||||
assert( sizeof( size ) == 8 );
|
||||
memcpy( &item->memAlloc.size, &size, 6 );
|
||||
memcpy( &item->memAlloc.size, &size, 4 );
|
||||
memcpy( ((char*)&item->memAlloc.size)+4, ((char*)&size)+4, 2 );
|
||||
}
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
@@ -528,6 +643,10 @@ private:
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
#if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC
|
||||
static int64_t GetTimeQpc();
|
||||
#endif
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
@@ -540,7 +659,9 @@ private:
|
||||
Socket* m_sock;
|
||||
UdpBroadcast* m_broadcast;
|
||||
bool m_noExit;
|
||||
uint32_t m_userPort;
|
||||
std::atomic<uint32_t> m_zoneId;
|
||||
int64_t m_samplingPeriod;
|
||||
|
||||
uint64_t m_threadCtx;
|
||||
int64_t m_refTimeThread;
|
||||
@@ -553,7 +674,6 @@ private:
|
||||
int m_bufferOffset;
|
||||
int m_bufferStart;
|
||||
|
||||
QueueItem* m_itemBuf;
|
||||
char* m_lz4Buf;
|
||||
|
||||
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
||||
@@ -563,8 +683,8 @@ private:
|
||||
TracyMutex m_fiLock;
|
||||
|
||||
std::atomic<uint64_t> m_frameCount;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<bool> m_isConnected;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint64_t> m_connectionId;
|
||||
|
||||
TracyMutex m_deferredLock;
|
||||
@@ -579,8 +699,10 @@ private:
|
||||
#else
|
||||
void ProcessSysTime() {}
|
||||
#endif
|
||||
|
||||
ParameterCallback m_paramCallback;
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
116
client/TracyRingBuffer.hpp
Normal file
@@ -0,0 +1,116 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<size_t Size>
|
||||
class RingBuffer
|
||||
{
|
||||
public:
|
||||
RingBuffer( int fd )
|
||||
: m_fd( fd )
|
||||
{
|
||||
const auto pageSize = uint32_t( getpagesize() );
|
||||
assert( Size >= pageSize );
|
||||
assert( __builtin_popcount( Size ) == 1 );
|
||||
m_mapSize = Size + pageSize;
|
||||
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
|
||||
if( !mapAddr )
|
||||
{
|
||||
m_fd = 0;
|
||||
close( fd );
|
||||
return;
|
||||
}
|
||||
m_metadata = (perf_event_mmap_page*)mapAddr;
|
||||
assert( m_metadata->data_offset == pageSize );
|
||||
m_buffer = ((char*)mapAddr) + pageSize;
|
||||
}
|
||||
|
||||
~RingBuffer()
|
||||
{
|
||||
if( m_metadata ) munmap( m_metadata, m_mapSize );
|
||||
if( m_fd ) close( m_fd );
|
||||
}
|
||||
|
||||
RingBuffer( const RingBuffer& ) = delete;
|
||||
RingBuffer& operator=( const RingBuffer& ) = delete;
|
||||
|
||||
RingBuffer( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
}
|
||||
|
||||
RingBuffer& operator=( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool IsValid() const { return m_metadata != nullptr; }
|
||||
|
||||
void Enable()
|
||||
{
|
||||
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
|
||||
}
|
||||
|
||||
bool HasData() const
|
||||
{
|
||||
const auto head = LoadHead();
|
||||
return head > m_metadata->data_tail;
|
||||
}
|
||||
|
||||
void Read( void* dst, uint64_t offset, uint64_t cnt )
|
||||
{
|
||||
auto src = ( m_metadata->data_tail + offset ) % Size;
|
||||
if( src + cnt <= Size )
|
||||
{
|
||||
memcpy( dst, m_buffer + src, cnt );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto s0 = Size - src;
|
||||
memcpy( dst, m_buffer + src, s0 );
|
||||
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
|
||||
}
|
||||
}
|
||||
|
||||
void Advance( uint64_t cnt )
|
||||
{
|
||||
StoreTail( m_metadata->data_tail + cnt );
|
||||
}
|
||||
|
||||
bool CheckTscCaps() const
|
||||
{
|
||||
return m_metadata->cap_user_time_zero;
|
||||
}
|
||||
|
||||
int64_t ConvertTimeToTsc( int64_t timestamp ) const
|
||||
{
|
||||
assert( m_metadata->cap_user_time_zero );
|
||||
const auto time = timestamp - m_metadata->time_zero;
|
||||
const auto quot = time / m_metadata->time_mult;
|
||||
const auto rem = time % m_metadata->time_mult;
|
||||
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t LoadHead() const
|
||||
{
|
||||
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
|
||||
}
|
||||
|
||||
void StoreTail( uint64_t tail )
|
||||
{
|
||||
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, tail, std::memory_order_release );
|
||||
}
|
||||
|
||||
perf_event_mmap_page* m_metadata;
|
||||
char* m_buffer;
|
||||
|
||||
size_t m_mapSize;
|
||||
int m_fd;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef __TRACYSCOPED_HPP__
|
||||
#define __TRACYSCOPED_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -18,39 +19,73 @@ public:
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
, m_connectionId( GetProfiler().ConnectionId() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBegin );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
, m_connectionId( GetProfiler().ConnectionId() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBeginCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
@@ -61,49 +96,50 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Text( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneText );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Name( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Value( uint64_t value )
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
Magic magic;
|
||||
auto token = GetToken();
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin( magic );
|
||||
MemWrite( &item->hdr.type, QueueType::ZoneName );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
TracyLfqPrepare( QueueType::ZoneValue );
|
||||
MemWrite( &item->zoneValue.value, value );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -5,12 +5,14 @@
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
# elif defined __linux__
|
||||
# include <assert.h>
|
||||
# include <stdio.h>
|
||||
# include <inttypes.h>
|
||||
# elif defined __APPLE__
|
||||
# include <mach/mach_host.h>
|
||||
# include <mach/host_info.h>
|
||||
# elif defined BSD
|
||||
# include <sys/types.h>
|
||||
# include <sys/sysctl.h>
|
||||
# endif
|
||||
|
||||
namespace tracy
|
||||
@@ -45,9 +47,12 @@ void SysTime::ReadTimes()
|
||||
FILE* f = fopen( "/proc/stat", "r" );
|
||||
if( f )
|
||||
{
|
||||
fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
|
||||
int read = fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
|
||||
fclose( f );
|
||||
used = user + nice + system;
|
||||
if (read == 4)
|
||||
{
|
||||
used = user + nice + system;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,6 +67,17 @@ void SysTime::ReadTimes()
|
||||
idle = info.cpu_ticks[CPU_STATE_IDLE];
|
||||
}
|
||||
|
||||
# elif defined BSD
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
u_long data[5];
|
||||
size_t sz = sizeof( data );
|
||||
sysctlbyname( "kern.cp_time", &data, &sz, nullptr, 0 );
|
||||
used = data[0] + data[1] + data[2] + data[3];
|
||||
idle = data[4];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SysTime::SysTime()
|
||||
@@ -81,7 +97,7 @@ float SysTime::Get()
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
return diffUsed == 0 ? -1 : ( diffUsed - diffIdle ) * 100.f / diffUsed;
|
||||
#elif defined __linux__ || defined __APPLE__
|
||||
#elif defined __linux__ || defined __APPLE__ || defined BSD
|
||||
const auto total = diffUsed + diffIdle;
|
||||
return total == 0 ? -1 : diffUsed * 100.f / total;
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__ || defined __linux__ || defined __APPLE__
|
||||
# define TRACY_HAS_SYSTIME
|
||||
#else
|
||||
# include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#ifdef BSD
|
||||
# define TRACY_HAS_SYSTIME
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSTIME
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
bool SysTraceStart();
|
||||
bool SysTraceStart( int64_t& samplingPeriod );
|
||||
void SysTraceStop();
|
||||
void SysTraceWorker( void* ptr );
|
||||
|
||||
|
||||
@@ -1,16 +1,31 @@
|
||||
#ifndef __TRACYTHREAD_HPP__
|
||||
#define __TRACYTHREAD_HPP__
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
# include "tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef _MSC_VER
|
||||
class ThreadExitHandler
|
||||
{
|
||||
public:
|
||||
~ThreadExitHandler()
|
||||
{
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
rpmalloc_thread_finalize();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
|
||||
class Thread
|
||||
{
|
||||
|
||||
@@ -191,7 +191,7 @@ struct ConcurrentQueueDefaultTraits
|
||||
// but many producers, a smaller block size should be favoured. For few producers
|
||||
// and/or many elements, a larger block size is preferred. A sane default
|
||||
// is provided. Must be a power of 2.
|
||||
static const size_t BLOCK_SIZE = 128;
|
||||
static const size_t BLOCK_SIZE = 64*1024;
|
||||
|
||||
// For explicit producers (i.e. when using a producer token), the block is
|
||||
// checked for being empty by iterating through a list of flags, one per element.
|
||||
@@ -243,7 +243,6 @@ struct ProducerToken;
|
||||
struct ConsumerToken;
|
||||
|
||||
template<typename T, typename Traits> class ConcurrentQueue;
|
||||
class ConcurrentQueueTests;
|
||||
|
||||
|
||||
namespace details
|
||||
@@ -413,7 +412,6 @@ struct ProducerToken
|
||||
|
||||
private:
|
||||
template<typename T, typename Traits> friend class ConcurrentQueue;
|
||||
friend class ConcurrentQueueTests;
|
||||
|
||||
protected:
|
||||
details::ConcurrentQueueProducerTypelessBase* producer;
|
||||
@@ -451,7 +449,6 @@ struct ConsumerToken
|
||||
|
||||
private:
|
||||
template<typename T, typename Traits> friend class ConcurrentQueue;
|
||||
friend class ConcurrentQueueTests;
|
||||
|
||||
private: // but shared with ConcurrentQueue
|
||||
std::uint32_t initialOffset;
|
||||
@@ -562,282 +559,18 @@ public:
|
||||
|
||||
// Disable copying and copy assignment
|
||||
ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
|
||||
ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_DELETE_FUNCTION;
|
||||
ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
|
||||
|
||||
// Moving is supported, but note that it is *not* a thread-safe operation.
|
||||
// Nobody can use the queue while it's being moved, and the memory effects
|
||||
// of that move must be propagated to other threads before they can use it.
|
||||
// Note: When a queue is moved, its tokens are still valid but can only be
|
||||
// used with the destination queue (i.e. semantically they are moved along
|
||||
// with the queue itself).
|
||||
ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
|
||||
: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
|
||||
producerCount(other.producerCount.load(std::memory_order_relaxed)),
|
||||
initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
|
||||
initialBlockPool(other.initialBlockPool),
|
||||
initialBlockPoolSize(other.initialBlockPoolSize),
|
||||
freeList(std::move(other.freeList)),
|
||||
nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
|
||||
globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
|
||||
{
|
||||
other.producerListTail.store(nullptr, std::memory_order_relaxed);
|
||||
other.producerCount.store(0, std::memory_order_relaxed);
|
||||
other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
|
||||
other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
|
||||
|
||||
other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
|
||||
other.initialBlockPoolSize = 0;
|
||||
other.initialBlockPool = nullptr;
|
||||
|
||||
reown_producers();
|
||||
}
|
||||
|
||||
inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
|
||||
{
|
||||
return swap_internal(other);
|
||||
}
|
||||
|
||||
// Swaps this queue's state with the other's. Not thread-safe.
|
||||
// Swapping two queues does not invalidate their tokens, however
|
||||
// the tokens that were created for one queue must be used with
|
||||
// only the swapped queue (i.e. the tokens are tied to the
|
||||
// queue's movable state, not the object itself).
|
||||
inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
|
||||
{
|
||||
swap_internal(other);
|
||||
}
|
||||
|
||||
private:
|
||||
ConcurrentQueue& swap_internal(ConcurrentQueue& other)
|
||||
{
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
details::swap_relaxed(producerListTail, other.producerListTail);
|
||||
details::swap_relaxed(producerCount, other.producerCount);
|
||||
details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
|
||||
std::swap(initialBlockPool, other.initialBlockPool);
|
||||
std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
|
||||
freeList.swap(other.freeList);
|
||||
details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
|
||||
details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
|
||||
|
||||
reown_producers();
|
||||
other.reown_producers();
|
||||
|
||||
return *this;
|
||||
}
|
||||
ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_DELETE_FUNCTION;
|
||||
|
||||
public:
|
||||
// Enqueues a single item (by copying it) using an explicit producer token.
|
||||
// Allocates memory if required. Only fails if memory allocation fails (or
|
||||
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
|
||||
// Thread-safe.
|
||||
inline bool enqueue(producer_token_t const& token, T const& item)
|
||||
{
|
||||
return inner_enqueue(token, item);
|
||||
}
|
||||
|
||||
// Enqueues a single item (by moving it, if possible) using an explicit producer token.
|
||||
// Allocates memory if required. Only fails if memory allocation fails (or
|
||||
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
|
||||
// Thread-safe.
|
||||
inline bool enqueue(producer_token_t const& token, T&& item)
|
||||
{
|
||||
return inner_enqueue(token, std::move(item));
|
||||
}
|
||||
|
||||
tracy_force_inline T* enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
|
||||
{
|
||||
return inner_enqueue_begin(token, currentTailIndex);
|
||||
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_begin(currentTailIndex);
|
||||
}
|
||||
|
||||
// Enqueues several items using an explicit producer token.
|
||||
// Allocates memory if required. Only fails if memory allocation fails
|
||||
// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
|
||||
// Note: Use std::make_move_iterator if the elements should be moved
|
||||
// instead of copied.
|
||||
// Thread-safe.
|
||||
template<typename It>
|
||||
bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
|
||||
{
|
||||
return inner_enqueue_bulk(token, itemFirst, count);
|
||||
}
|
||||
|
||||
// Attempts to dequeue from the queue.
|
||||
// Returns false if all producer streams appeared empty at the time they
|
||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename U>
|
||||
bool try_dequeue(U& item)
|
||||
{
|
||||
// Instead of simply trying each producer in turn (which could cause needless contention on the first
|
||||
// producer), we score them heuristically.
|
||||
size_t nonEmptyCount = 0;
|
||||
ProducerBase* best = nullptr;
|
||||
size_t bestSize = 0;
|
||||
for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
|
||||
auto size = ptr->size_approx();
|
||||
if (size > 0) {
|
||||
if (size > bestSize) {
|
||||
bestSize = size;
|
||||
best = ptr;
|
||||
}
|
||||
++nonEmptyCount;
|
||||
}
|
||||
}
|
||||
|
||||
// If there was at least one non-empty queue but it appears empty at the time
|
||||
// we try to dequeue from it, we need to make sure every queue's been tried
|
||||
if (nonEmptyCount > 0) {
|
||||
if (details::cqLikely(best->dequeue(item))) {
|
||||
return true;
|
||||
}
|
||||
for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
|
||||
if (ptr != best && ptr->dequeue(item)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Attempts to dequeue from the queue.
|
||||
// Returns false if all producer streams appeared empty at the time they
|
||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// This differs from the try_dequeue(item) method in that this one does
|
||||
// not attempt to reduce contention by interleaving the order that producer
|
||||
// streams are dequeued from. So, using this method can reduce overall throughput
|
||||
// under contention, but will give more predictable results in single-threaded
|
||||
// consumer scenarios. This is mostly only useful for internal unit tests.
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename U>
|
||||
bool try_dequeue_non_interleaved(U& item)
|
||||
{
|
||||
for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
|
||||
if (ptr->dequeue(item)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Attempts to dequeue from the queue using an explicit consumer token.
|
||||
// Returns false if all producer streams appeared empty at the time they
|
||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename U>
|
||||
bool try_dequeue(consumer_token_t& token, U& item)
|
||||
{
|
||||
// The idea is roughly as follows:
|
||||
// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
|
||||
// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
|
||||
// If there's no items where you're supposed to be, keep moving until you find a producer with some items
|
||||
// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
|
||||
|
||||
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
||||
if (!update_current_producer_after_rotation(token)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If there was at least one non-empty queue but it appears empty at the time
|
||||
// we try to dequeue from it, we need to make sure every queue's been tried
|
||||
if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
|
||||
if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
|
||||
globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
auto tail = producerListTail.load(std::memory_order_acquire);
|
||||
auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
|
||||
if (ptr == nullptr) {
|
||||
ptr = tail;
|
||||
}
|
||||
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
||||
if (ptr->dequeue(item)) {
|
||||
token.currentProducer = ptr;
|
||||
token.itemsConsumedFromCurrent = 1;
|
||||
return true;
|
||||
}
|
||||
ptr = ptr->next_prod();
|
||||
if (ptr == nullptr) {
|
||||
ptr = tail;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Attempts to dequeue several elements from the queue.
|
||||
// Returns the number of items actually dequeued.
|
||||
// Returns 0 if all producer streams appeared empty at the time they
|
||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename It>
|
||||
size_t try_dequeue_bulk(It itemFirst, size_t max)
|
||||
{
|
||||
size_t count = 0;
|
||||
for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
|
||||
count += ptr->dequeue_bulk(itemFirst, max - count);
|
||||
if (count == max) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Attempts to dequeue several elements from the queue using an explicit consumer token.
|
||||
// Returns the number of items actually dequeued.
|
||||
// Returns 0 if all producer streams appeared empty at the time they
|
||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename It>
|
||||
size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
|
||||
{
|
||||
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
||||
if (!update_current_producer_after_rotation(token)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
|
||||
if (count == max) {
|
||||
if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
|
||||
globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
|
||||
max -= count;
|
||||
|
||||
auto tail = producerListTail.load(std::memory_order_acquire);
|
||||
auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
|
||||
if (ptr == nullptr) {
|
||||
ptr = tail;
|
||||
}
|
||||
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
||||
auto dequeued = ptr->dequeue_bulk(itemFirst, max);
|
||||
count += dequeued;
|
||||
if (dequeued != 0) {
|
||||
token.currentProducer = ptr;
|
||||
token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
|
||||
}
|
||||
if (dequeued == max) {
|
||||
break;
|
||||
}
|
||||
max -= dequeued;
|
||||
ptr = ptr->next_prod();
|
||||
if (ptr == nullptr) {
|
||||
ptr = tail;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
template<typename It>
|
||||
size_t try_dequeue_bulk_single(consumer_token_t& token, It itemFirst, size_t max, uint64_t& threadId )
|
||||
template<class NotifyThread, class ProcessData>
|
||||
size_t try_dequeue_bulk_single(consumer_token_t& token, NotifyThread notifyThread, ProcessData processData )
|
||||
{
|
||||
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
||||
if (!update_current_producer_after_rotation(token)) {
|
||||
@@ -845,14 +578,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
|
||||
if (count == max) {
|
||||
if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
|
||||
globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
threadId = token.currentProducer->threadId;
|
||||
return max;
|
||||
}
|
||||
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(notifyThread, processData);
|
||||
token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
|
||||
|
||||
auto tail = producerListTail.load(std::memory_order_acquire);
|
||||
@@ -863,9 +589,8 @@ public:
|
||||
if( count == 0 )
|
||||
{
|
||||
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
||||
auto dequeued = ptr->dequeue_bulk(itemFirst, max);
|
||||
auto dequeued = ptr->dequeue_bulk(notifyThread, processData);
|
||||
if (dequeued != 0) {
|
||||
threadId = ptr->threadId;
|
||||
token.currentProducer = ptr;
|
||||
token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
|
||||
return dequeued;
|
||||
@@ -879,38 +604,12 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
threadId = token.currentProducer->threadId;
|
||||
token.currentProducer = ptr;
|
||||
token.itemsConsumedFromCurrent = 0;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
// Attempts to dequeue from a specific producer's inner queue.
|
||||
// If you happen to know which producer you want to dequeue from, this
|
||||
// is significantly faster than using the general-case try_dequeue methods.
|
||||
// Returns false if the producer's queue appeared empty at the time it
|
||||
// was checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename U>
|
||||
inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
|
||||
}
|
||||
|
||||
// Attempts to dequeue several elements from a specific producer's inner queue.
|
||||
// Returns the number of items actually dequeued.
|
||||
// If you happen to know which producer you want to dequeue from, this
|
||||
// is significantly faster than using the general-case try_dequeue methods.
|
||||
// Returns 0 if the producer's queue appeared empty at the time it
|
||||
// was checked (so, the queue is likely but not guaranteed to be empty).
|
||||
// Never allocates. Thread-safe.
|
||||
template<typename It>
|
||||
inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
|
||||
}
|
||||
|
||||
|
||||
// Returns an estimate of the total number of elements currently in the queue. This
|
||||
// estimate is only accurate if the queue has completely stabilized before it is called
|
||||
@@ -946,31 +645,12 @@ private:
|
||||
friend struct ProducerToken;
|
||||
friend struct ConsumerToken;
|
||||
friend struct ExplicitProducer;
|
||||
friend class ConcurrentQueueTests;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////
|
||||
// Queue methods
|
||||
///////////////////////////////
|
||||
|
||||
template<typename U>
|
||||
inline bool inner_enqueue(producer_token_t const& token, U&& element)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue(std::forward<U>(element));
|
||||
}
|
||||
|
||||
tracy_force_inline T* inner_enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_begin(currentTailIndex);
|
||||
}
|
||||
|
||||
template<typename It>
|
||||
inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_bulk(itemFirst, count);
|
||||
}
|
||||
|
||||
inline bool update_current_producer_after_rotation(consumer_token_t& token)
|
||||
{
|
||||
// Ah, there's been a rotation, figure out where we should be!
|
||||
@@ -1274,16 +954,10 @@ private:
|
||||
|
||||
virtual ~ProducerBase() { };
|
||||
|
||||
template<typename U>
|
||||
inline bool dequeue(U& element)
|
||||
template<class NotifyThread, class ProcessData>
|
||||
inline size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(this)->dequeue(element);
|
||||
}
|
||||
|
||||
template<typename It>
|
||||
inline size_t dequeue_bulk(It& itemFirst, size_t max)
|
||||
{
|
||||
return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
|
||||
return static_cast<ExplicitProducer*>(this)->dequeue_bulk(notifyThread, processData);
|
||||
}
|
||||
|
||||
inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
|
||||
@@ -1398,106 +1072,6 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
template<typename U>
|
||||
inline bool enqueue(U&& element)
|
||||
{
|
||||
index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
|
||||
index_t newTailIndex = 1 + currentTailIndex;
|
||||
if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
|
||||
// We reached the end of a block, start a new one
|
||||
auto startBlock = this->tailBlock;
|
||||
auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
|
||||
if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::is_empty()) {
|
||||
// We can re-use the block ahead of us, it's empty!
|
||||
this->tailBlock = this->tailBlock->next;
|
||||
this->tailBlock->ConcurrentQueue::Block::reset_empty();
|
||||
|
||||
// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
|
||||
// last block from it first -- except instead of removing then adding, we can just overwrite).
|
||||
// Note that there must be a valid block index here, since even if allocation failed in the ctor,
|
||||
// it would have been re-attempted when adding the first block to the queue; since there is such
|
||||
// a block, a block index must have been successfully allocated.
|
||||
}
|
||||
else {
|
||||
// Whatever head value we see here is >= the last value we saw here (relatively),
|
||||
// and <= its current value. Since we have the most recent tail, the head must be
|
||||
// <= to it.
|
||||
auto head = this->headIndex.load(std::memory_order_relaxed);
|
||||
assert(!details::circular_less_than<index_t>(currentTailIndex, head));
|
||||
if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
|
||||
|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
|
||||
// We can't enqueue in another block because there's not enough leeway -- the
|
||||
// tail could surpass the head by the time the block fills up! (Or we'll exceed
|
||||
// the size limit, if the second part of the condition was true.)
|
||||
return false;
|
||||
}
|
||||
// We're going to need a new block; check that the block index has room
|
||||
if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
|
||||
// Hmm, the circular block index is already full -- we'll need
|
||||
// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
|
||||
// the initial allocation failed in the constructor.
|
||||
|
||||
if (!new_block_index(pr_blockIndexSlotsUsed)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert a new block in the circular linked list
|
||||
auto newBlock = this->parent->ConcurrentQueue::requisition_block();
|
||||
if (newBlock == nullptr) {
|
||||
return false;
|
||||
}
|
||||
newBlock->ConcurrentQueue::Block::reset_empty();
|
||||
if (this->tailBlock == nullptr) {
|
||||
newBlock->next = newBlock;
|
||||
}
|
||||
else {
|
||||
newBlock->next = this->tailBlock->next;
|
||||
this->tailBlock->next = newBlock;
|
||||
}
|
||||
this->tailBlock = newBlock;
|
||||
++pr_blockIndexSlotsUsed;
|
||||
}
|
||||
|
||||
if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
|
||||
// The constructor may throw. We want the element not to appear in the queue in
|
||||
// that case (without corrupting the queue):
|
||||
MOODYCAMEL_TRY {
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
|
||||
}
|
||||
MOODYCAMEL_CATCH (...) {
|
||||
// Revert change to the current block, but leave the new block available
|
||||
// for next time
|
||||
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
|
||||
this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
|
||||
MOODYCAMEL_RETHROW;
|
||||
}
|
||||
}
|
||||
else {
|
||||
(void)startBlock;
|
||||
(void)originalBlockIndexSlotsUsed;
|
||||
}
|
||||
|
||||
// Add block to block index
|
||||
auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
|
||||
entry.base = currentTailIndex;
|
||||
entry.block = this->tailBlock;
|
||||
blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
|
||||
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
|
||||
|
||||
if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
|
||||
this->tailIndex.store(newTailIndex, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Enqueue
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
|
||||
|
||||
this->tailIndex.store(newTailIndex, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void enqueue_begin_alloc(index_t currentTailIndex)
|
||||
{
|
||||
// We reached the end of a block, start a new one
|
||||
@@ -1556,296 +1130,15 @@ private:
|
||||
{
|
||||
return this->tailIndex;
|
||||
}
|
||||
|
||||
template<typename U>
|
||||
bool dequeue(U& element)
|
||||
{
|
||||
auto tail = this->tailIndex.load(std::memory_order_relaxed);
|
||||
auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
|
||||
if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
|
||||
// Might be something to dequeue, let's give it a try
|
||||
|
||||
// Note that this if is purely for performance purposes in the common case when the queue is
|
||||
// empty and the values are eventually consistent -- we may enter here spuriously.
|
||||
|
||||
// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
|
||||
// change them) and must be the same value at this point (inside the if) as when the if condition was
|
||||
// evaluated.
|
||||
|
||||
// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
|
||||
// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
|
||||
// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
|
||||
// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
|
||||
// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
|
||||
// unfortunately that can't be shown to be correct using only the C++11 standard.
|
||||
// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
|
||||
// Increment optimistic counter, then check if it went over the boundary
|
||||
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
|
||||
// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
|
||||
// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
|
||||
// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
|
||||
assert(overcommit <= myDequeueCount);
|
||||
|
||||
// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
|
||||
// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
|
||||
// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
|
||||
tail = this->tailIndex.load(std::memory_order_acquire);
|
||||
if (details::cqLikely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
|
||||
// Guaranteed to be at least one element to dequeue!
|
||||
|
||||
// Get the index. Note that since there's guaranteed to be at least one element, this
|
||||
// will never exceed tail. We need to do an acquire-release fence here since it's possible
|
||||
// that whatever condition got us to this point was for an earlier enqueued element (that
|
||||
// we already see the memory effects for), but that by the time we increment somebody else
|
||||
// has incremented it, and we need to see the memory effects for *that* element, which is
|
||||
// in such a case is necessarily visible on the thread that incremented it in the first
|
||||
// place with the more current condition (they must have acquired a tail that is at least
|
||||
// as recent).
|
||||
auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
|
||||
|
||||
|
||||
// Determine which block the element is in
|
||||
|
||||
auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
|
||||
auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
|
||||
|
||||
// We need to be careful here about subtracting and dividing because of index wrap-around.
|
||||
// When an index wraps, we need to preserve the sign of the offset when dividing it by the
|
||||
// block size (in order to get a correct signed block count offset in all cases):
|
||||
auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
|
||||
auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
|
||||
auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
|
||||
auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
|
||||
|
||||
// Dequeue
|
||||
auto& el = *((*block)[index]);
|
||||
if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
|
||||
// Make sure the element is still fully dequeued and destroyed even if the assignment
|
||||
// throws
|
||||
struct Guard {
|
||||
Block* block;
|
||||
index_t index;
|
||||
|
||||
~Guard()
|
||||
{
|
||||
(*block)[index]->~T();
|
||||
block->ConcurrentQueue::Block::set_empty(index);
|
||||
}
|
||||
} guard = { block, index };
|
||||
|
||||
element = std::move(el);
|
||||
}
|
||||
else {
|
||||
element = std::move(el);
|
||||
el.~T();
|
||||
block->ConcurrentQueue::Block::set_empty(index);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
|
||||
this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename It>
|
||||
bool enqueue_bulk(It itemFirst, size_t count)
|
||||
{
|
||||
// First, we need to make sure we have enough room to enqueue all of the elements;
|
||||
// this means pre-allocating blocks and putting them in the block index (but only if
|
||||
// all the allocations succeeded).
|
||||
index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
|
||||
auto startBlock = this->tailBlock;
|
||||
auto originalBlockIndexFront = pr_blockIndexFront;
|
||||
auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
|
||||
|
||||
Block* firstAllocatedBlock = nullptr;
|
||||
|
||||
// Figure out how many blocks we'll need to allocate, and do so
|
||||
size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
|
||||
index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
|
||||
if (blockBaseDiff > 0) {
|
||||
// Allocate as many blocks as possible from ahead
|
||||
while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::is_empty()) {
|
||||
blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
|
||||
currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
|
||||
|
||||
this->tailBlock = this->tailBlock->next;
|
||||
firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
|
||||
|
||||
auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
|
||||
entry.base = currentTailIndex;
|
||||
entry.block = this->tailBlock;
|
||||
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
|
||||
}
|
||||
|
||||
// Now allocate as many blocks as necessary from the block pool
|
||||
while (blockBaseDiff > 0) {
|
||||
blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
|
||||
currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
|
||||
|
||||
auto head = this->headIndex.load(std::memory_order_relaxed);
|
||||
assert(!details::circular_less_than<index_t>(currentTailIndex, head));
|
||||
bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
|
||||
if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
|
||||
if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
|
||||
// Failed to allocate, undo changes (but keep injected blocks)
|
||||
pr_blockIndexFront = originalBlockIndexFront;
|
||||
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
|
||||
this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
|
||||
return false;
|
||||
}
|
||||
|
||||
// pr_blockIndexFront is updated inside new_block_index, so we need to
|
||||
// update our fallback value too (since we keep the new index even if we
|
||||
// later fail)
|
||||
originalBlockIndexFront = originalBlockIndexSlotsUsed;
|
||||
}
|
||||
|
||||
// Insert a new block in the circular linked list
|
||||
auto newBlock = this->parent->ConcurrentQueue::requisition_block();
|
||||
if (newBlock == nullptr) {
|
||||
pr_blockIndexFront = originalBlockIndexFront;
|
||||
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
|
||||
this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
|
||||
return false;
|
||||
}
|
||||
|
||||
newBlock->ConcurrentQueue::Block::set_all_empty();
|
||||
if (this->tailBlock == nullptr) {
|
||||
newBlock->next = newBlock;
|
||||
}
|
||||
else {
|
||||
newBlock->next = this->tailBlock->next;
|
||||
this->tailBlock->next = newBlock;
|
||||
}
|
||||
this->tailBlock = newBlock;
|
||||
firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
|
||||
|
||||
++pr_blockIndexSlotsUsed;
|
||||
|
||||
auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
|
||||
entry.base = currentTailIndex;
|
||||
entry.block = this->tailBlock;
|
||||
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
|
||||
}
|
||||
|
||||
// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
|
||||
// publish the new block index front
|
||||
auto block = firstAllocatedBlock;
|
||||
while (true) {
|
||||
block->ConcurrentQueue::Block::reset_empty();
|
||||
if (block == this->tailBlock) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
// Enqueue, one block at a time
|
||||
index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
|
||||
currentTailIndex = startTailIndex;
|
||||
auto endBlock = this->tailBlock;
|
||||
this->tailBlock = startBlock;
|
||||
assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
|
||||
if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
|
||||
this->tailBlock = firstAllocatedBlock;
|
||||
}
|
||||
while (true) {
|
||||
auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
|
||||
if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
|
||||
stopIndex = newTailIndex;
|
||||
}
|
||||
if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
while (currentTailIndex != stopIndex) {
|
||||
new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
|
||||
}
|
||||
}
|
||||
else {
|
||||
MOODYCAMEL_TRY {
|
||||
while (currentTailIndex != stopIndex) {
|
||||
// Must use copy constructor even if move constructor is available
|
||||
// because we may have to revert if there's an exception.
|
||||
// Sorry about the horrible templated next line, but it was the only way
|
||||
// to disable moving *at compile time*, which is important because a type
|
||||
// may only define a (noexcept) move constructor, and so calls to the
|
||||
// cctor will not compile, even if they are in an if branch that will never
|
||||
// be executed
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
|
||||
++currentTailIndex;
|
||||
++itemFirst;
|
||||
}
|
||||
}
|
||||
MOODYCAMEL_CATCH (...) {
|
||||
// Oh dear, an exception's been thrown -- destroy the elements that
|
||||
// were enqueued so far and revert the entire bulk operation (we'll keep
|
||||
// any allocated blocks in our linked list for later, though).
|
||||
auto constructedStopIndex = currentTailIndex;
|
||||
auto lastBlockEnqueued = this->tailBlock;
|
||||
|
||||
pr_blockIndexFront = originalBlockIndexFront;
|
||||
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
|
||||
this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
|
||||
|
||||
if (!details::is_trivially_destructible<T>::value) {
|
||||
auto block = startBlock;
|
||||
if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
|
||||
block = firstAllocatedBlock;
|
||||
}
|
||||
currentTailIndex = startTailIndex;
|
||||
while (true) {
|
||||
stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
|
||||
if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
|
||||
stopIndex = constructedStopIndex;
|
||||
}
|
||||
while (currentTailIndex != stopIndex) {
|
||||
(*block)[currentTailIndex++]->~T();
|
||||
}
|
||||
if (block == lastBlockEnqueued) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
}
|
||||
MOODYCAMEL_RETHROW;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->tailBlock == endBlock) {
|
||||
assert(currentTailIndex == newTailIndex);
|
||||
break;
|
||||
}
|
||||
this->tailBlock = this->tailBlock->next;
|
||||
}
|
||||
|
||||
if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {
|
||||
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
|
||||
}
|
||||
|
||||
this->tailIndex.store(newTailIndex, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename It>
|
||||
size_t dequeue_bulk(It& itemFirst, size_t max)
|
||||
template<class NotifyThread, class ProcessData>
|
||||
size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData)
|
||||
{
|
||||
auto tail = this->tailIndex.load(std::memory_order_relaxed);
|
||||
auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
|
||||
auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
|
||||
if (details::circular_less_than<size_t>(0, desiredCount)) {
|
||||
desiredCount = desiredCount < max ? desiredCount : max;
|
||||
desiredCount = desiredCount < 8192 ? desiredCount : 8192;
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
|
||||
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
|
||||
@@ -1871,7 +1164,9 @@ private:
|
||||
auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
|
||||
auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
|
||||
auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
|
||||
|
||||
|
||||
notifyThread( this->threadId );
|
||||
|
||||
// Iterate the blocks and dequeue
|
||||
auto index = firstIndex;
|
||||
do {
|
||||
@@ -1880,10 +1175,9 @@ private:
|
||||
endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
|
||||
auto block = localBlockIndex->entries[indexIndex].block;
|
||||
|
||||
const auto sz = endIndex - index;
|
||||
memcpy( itemFirst, (*block)[index], sizeof( T ) * sz );
|
||||
index += sz;
|
||||
itemFirst += sz;
|
||||
const auto sz = endIndex - index;
|
||||
processData( (*block)[index], sz );
|
||||
index += sz;
|
||||
|
||||
block->ConcurrentQueue::Block::set_many_empty(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
|
||||
indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
|
||||
@@ -2125,14 +1419,14 @@ private:
|
||||
static inline U* create()
|
||||
{
|
||||
auto p = (Traits::malloc)(sizeof(U));
|
||||
return p != nullptr ? new (p) U : nullptr;
|
||||
return new (p) U;
|
||||
}
|
||||
|
||||
template<typename U, typename A1>
|
||||
static inline U* create(A1&& a1)
|
||||
{
|
||||
auto p = (Traits::malloc)(sizeof(U));
|
||||
return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
|
||||
return new (p) U(std::forward<A1>(a1));
|
||||
}
|
||||
|
||||
template<typename U>
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson / Rampant Pixels
|
||||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson
|
||||
*
|
||||
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
|
||||
* The latest source code is always available at
|
||||
*
|
||||
* https://github.com/rampantpixels/rpmalloc
|
||||
* https://github.com/mjansson/rpmalloc
|
||||
*
|
||||
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
|
||||
*
|
||||
@@ -12,53 +12,113 @@
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
# define RPMALLOC_ATTRIBUTE __attribute__((__malloc__))
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_EXPORT __attribute__((visibility("default")))
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
|
||||
# if defined(__clang_major__) && (__clang_major__ < 4)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
|
||||
# else
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
|
||||
# endif
|
||||
# define RPMALLOC_CDECL
|
||||
#elif defined(_MSC_VER)
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT __declspec(restrict)
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_CDECL __cdecl
|
||||
#else
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_CDECL
|
||||
#endif
|
||||
|
||||
//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes
|
||||
#ifndef RPMALLOC_CONFIGURABLE
|
||||
#define RPMALLOC_CONFIGURABLE 0
|
||||
#endif
|
||||
|
||||
//! Flag to rpaligned_realloc to not preserve content in reallocation
|
||||
#define RPMALLOC_NO_PRESERVE 1
|
||||
|
||||
typedef struct rpmalloc_global_statistics_t {
|
||||
//! Current amount of virtual memory mapped (only if ENABLE_STATISTICS=1)
|
||||
//! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped;
|
||||
//! Current amount of memory in global caches for small and medium sizes (<64KiB)
|
||||
//! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_peak;
|
||||
//! Current amount of memory in global caches for small and medium sizes (<32KiB)
|
||||
size_t cached;
|
||||
//! Total amount of memory mapped (only if ENABLE_STATISTICS=1)
|
||||
//! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc;
|
||||
//! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc_peak;
|
||||
//! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_total;
|
||||
//! Total amount of memory unmapped (only if ENABLE_STATISTICS=1)
|
||||
//! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
size_t unmapped_total;
|
||||
} rpmalloc_global_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_thread_statistics_t {
|
||||
//! Current number of bytes available for allocation from active spans
|
||||
size_t active;
|
||||
//! Current number of bytes available in thread size class caches
|
||||
//! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
|
||||
size_t sizecache;
|
||||
//! Current number of bytes available in thread span caches
|
||||
//! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
|
||||
size_t spancache;
|
||||
//! Current number of bytes in pending deferred deallocations
|
||||
size_t deferred;
|
||||
//! Total number of bytes transitioned from thread cache to global cache
|
||||
//! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
|
||||
size_t thread_to_global;
|
||||
//! Total number of bytes transitioned from global cache to thread cache
|
||||
//! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
|
||||
size_t global_to_thread;
|
||||
//! Per span count statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Currently used number of spans
|
||||
size_t current;
|
||||
//! High water mark of spans used
|
||||
size_t peak;
|
||||
//! Number of spans transitioned to global cache
|
||||
size_t to_global;
|
||||
//! Number of spans transitioned from global cache
|
||||
size_t from_global;
|
||||
//! Number of spans transitioned to thread cache
|
||||
size_t to_cache;
|
||||
//! Number of spans transitioned from thread cache
|
||||
size_t from_cache;
|
||||
//! Number of spans transitioned to reserved state
|
||||
size_t to_reserved;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} span_use[32];
|
||||
//! Per size class statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Current number of allocations
|
||||
size_t alloc_current;
|
||||
//! Peak number of allocations
|
||||
size_t alloc_peak;
|
||||
//! Total number of allocations
|
||||
size_t alloc_total;
|
||||
//! Total number of frees
|
||||
size_t free_total;
|
||||
//! Number of spans transitioned to cache
|
||||
size_t spans_to_cache;
|
||||
//! Number of spans transitioned from cache
|
||||
size_t spans_from_cache;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t spans_from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} size_use[128];
|
||||
} rpmalloc_thread_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_config_t {
|
||||
@@ -69,85 +129,133 @@ typedef struct rpmalloc_config_t {
|
||||
// actual start of the memory region due to this alignment. The alignment offset
|
||||
// will be passed to the memory unmap function. The alignment offset MUST NOT be
|
||||
// larger than 65535 (storable in an uint16_t), if it is you must use natural
|
||||
// alignment to shift it into 16 bits.
|
||||
// alignment to shift it into 16 bits. If you set a memory_map function, you
|
||||
// must also set a memory_unmap function or else the default implementation will
|
||||
// be used for both.
|
||||
void* (*memory_map)(size_t size, size_t* offset);
|
||||
//! Unmap the memory pages starting at address and spanning the given number of bytes.
|
||||
// If release is set to 1, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released.
|
||||
// If release is set to 0, the unmap is a partial decommit of a subset of the mapped
|
||||
// memory range.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, int release);
|
||||
//! Size of memory pages. The page size MUST be a power of two in [512,16384] range
|
||||
// (2^9 to 2^14) unless 0 - set to 0 to use system page size. All memory mapping
|
||||
// If release is set to non-zero, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released. The
|
||||
// release argument holds the size of the entire span range. If release is set to 0,
|
||||
// the unmap is a partial decommit of a subset of the mapped memory range.
|
||||
// If you set a memory_unmap function, you must also set a memory_map function or
|
||||
// else the default implementation will be used for both.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, size_t release);
|
||||
//! Size of memory pages. The page size MUST be a power of two. All memory mapping
|
||||
// requests to memory_map will be made with size set to a multiple of the page size.
|
||||
// Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
|
||||
size_t page_size;
|
||||
//! Size of a span of memory pages. MUST be a multiple of page size, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size).
|
||||
//! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE
|
||||
// is defined to 1.
|
||||
size_t span_size;
|
||||
//! Number of spans to map at each request to map new virtual memory blocks. This can
|
||||
// be used to minimize the system call overhead at the cost of virtual memory address
|
||||
// space. The extra mapped pages will not be written until actually used, so physical
|
||||
// committed memory should not be affected in the default implementation.
|
||||
// committed memory should not be affected in the default implementation. Will be
|
||||
// aligned to a multiple of spans that match memory page size in case of huge pages.
|
||||
size_t span_map_count;
|
||||
//! Debug callback if memory guards are enabled. Called if a memory overwrite is detected
|
||||
void (*memory_overwrite)(void* address);
|
||||
//! Enable use of large/huge pages. If this flag is set to non-zero and page size is
|
||||
// zero, the allocator will try to enable huge pages and auto detect the configuration.
|
||||
// If this is set to non-zero and page_size is also non-zero, the allocator will
|
||||
// assume huge pages have been configured and enabled prior to initializing the
|
||||
// allocator.
|
||||
// For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
|
||||
// For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
|
||||
int enable_huge_pages;
|
||||
} rpmalloc_config_t;
|
||||
|
||||
extern int
|
||||
//! Initialize allocator with default configuration
|
||||
RPMALLOC_EXPORT int
|
||||
rpmalloc_initialize(void);
|
||||
|
||||
extern int
|
||||
//! Initialize allocator with given configuration
|
||||
RPMALLOC_EXPORT int
|
||||
rpmalloc_initialize_config(const rpmalloc_config_t* config);
|
||||
|
||||
extern const rpmalloc_config_t*
|
||||
//! Get allocator configuration
|
||||
RPMALLOC_EXPORT const rpmalloc_config_t*
|
||||
rpmalloc_config(void);
|
||||
|
||||
extern void
|
||||
//! Finalize allocator
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_finalize(void);
|
||||
|
||||
void
|
||||
//! Initialize allocator for calling thread
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_initialize(void);
|
||||
|
||||
extern void
|
||||
//! Finalize allocator for calling thread
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_finalize(void);
|
||||
|
||||
extern void
|
||||
//! Perform deferred deallocations pending for the calling thread heap
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_collect(void);
|
||||
|
||||
extern int
|
||||
//! Query if allocator is initialized for calling thread
|
||||
RPMALLOC_EXPORT int
|
||||
rpmalloc_is_thread_initialized(void);
|
||||
|
||||
extern void
|
||||
//! Get per-thread statistics
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
|
||||
|
||||
extern void
|
||||
//! Get global statistics
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
|
||||
|
||||
TRACY_API RPMALLOC_RESTRICT void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIBUTE;
|
||||
//! Dump all statistics in human readable format to file (should be a FILE*)
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_dump_statistics(void* file);
|
||||
|
||||
//! Allocate a memory block of at least the given size
|
||||
TRACY_API RPMALLOC_ALLOCATOR void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
|
||||
|
||||
//! Free the given memory block
|
||||
TRACY_API void
|
||||
rpfree(void* ptr);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
//! Allocate a memory block of at least the given size and zero initialize it
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
|
||||
|
||||
extern void*
|
||||
rprealloc(void* ptr, size_t size);
|
||||
//! Reallocate the given block to at least the given size
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
extern void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags);
|
||||
//! Reallocate the given block to at least the given size and alignment,
|
||||
// with optional control flags (see RPMALLOC_NO_PRESERVE).
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
extern int
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT int
|
||||
rpposix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
|
||||
extern size_t
|
||||
//! Query the usable size of the given memory block (from given pointer to the end of block)
|
||||
RPMALLOC_EXPORT size_t
|
||||
rpmalloc_usable_size(void* ptr);
|
||||
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __TRACYALLOC_HPP__
|
||||
#define __TRACYALLOC_HPP__
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include "../client/tracy_rpmalloc.hpp"
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
#ifndef __TRACYAPI_H__
|
||||
#define __TRACYAPI_H__
|
||||
|
||||
#ifdef _WIN32
|
||||
# if defined TRACY_IMPORTS
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# if defined TRACY_EXPORTS
|
||||
# define TRACY_API __declspec(dllexport)
|
||||
# elif defined TRACY_IMPORTS
|
||||
# define TRACY_API __declspec(dllimport)
|
||||
# else
|
||||
# define TRACY_API __declspec(dllexport)
|
||||
# define TRACY_API
|
||||
# endif
|
||||
#else
|
||||
# define TRACY_API __attribute__((visibility("default")))
|
||||
|
||||
@@ -10,15 +10,6 @@ namespace tracy
|
||||
using TracyMutex = std::shared_mutex;
|
||||
}
|
||||
|
||||
#elif defined __CYGWIN__
|
||||
|
||||
#include "tracy_benaphore.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
using TracyMutex = NonRecursiveBenaphore;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <mutex>
|
||||
|
||||
@@ -4,18 +4,18 @@
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/tracy_lz4.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 21 };
|
||||
enum : uint32_t { BroadcastVersion = 0 };
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 40 };
|
||||
enum : uint32_t { BroadcastVersion = 1 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
||||
enum { TargetFrameSize = 256 * 1024 };
|
||||
enum { LZ4Size = LZ4_COMPRESSBOUND( TargetFrameSize ) };
|
||||
enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) };
|
||||
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
|
||||
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
|
||||
|
||||
@@ -36,6 +36,7 @@ enum { WelcomeMessageHostInfoSize = 1024 };
|
||||
|
||||
#pragma pack( 1 )
|
||||
|
||||
// Must increase left query space after handling!
|
||||
enum ServerQuery : uint8_t
|
||||
{
|
||||
ServerQueryTerminate,
|
||||
@@ -46,18 +47,33 @@ enum ServerQuery : uint8_t
|
||||
ServerQueryCallstackFrame,
|
||||
ServerQueryFrameName,
|
||||
ServerQueryDisconnect,
|
||||
ServerQueryExternalName
|
||||
ServerQueryExternalName,
|
||||
ServerQueryParameter,
|
||||
ServerQuerySymbol,
|
||||
ServerQuerySymbolCode,
|
||||
ServerQueryCodeLocation
|
||||
};
|
||||
|
||||
struct ServerQueryPacket
|
||||
{
|
||||
ServerQuery type;
|
||||
uint64_t ptr;
|
||||
uint32_t extra;
|
||||
};
|
||||
|
||||
enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) };
|
||||
|
||||
|
||||
enum CpuArchitecture : uint8_t
|
||||
{
|
||||
CpuArchUnknown,
|
||||
CpuArchX86,
|
||||
CpuArchX64,
|
||||
CpuArchArm32,
|
||||
CpuArchArm64
|
||||
};
|
||||
|
||||
|
||||
struct WelcomeMessage
|
||||
{
|
||||
double timerMul;
|
||||
@@ -67,8 +83,13 @@ struct WelcomeMessage
|
||||
uint64_t resolution;
|
||||
uint64_t epoch;
|
||||
uint64_t pid;
|
||||
int64_t samplingPeriod;
|
||||
uint8_t onDemand;
|
||||
uint8_t isApple;
|
||||
uint8_t cpuArch;
|
||||
uint8_t codeTransfer;
|
||||
char cpuManufacturer[12];
|
||||
uint32_t cpuId;
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
char hostInfo[WelcomeMessageHostInfoSize];
|
||||
};
|
||||
@@ -89,6 +110,7 @@ struct BroadcastMessage
|
||||
{
|
||||
uint32_t broadcastVersion;
|
||||
uint32_t protocolVersion;
|
||||
uint32_t listenPort;
|
||||
uint32_t activeTime; // in seconds
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
};
|
||||
|
||||
@@ -12,12 +12,15 @@ enum class QueueType : uint8_t
|
||||
ZoneName,
|
||||
Message,
|
||||
MessageColor,
|
||||
MessageCallstack,
|
||||
MessageColorCallstack,
|
||||
MessageAppInfo,
|
||||
ZoneBeginAllocSrcLoc,
|
||||
ZoneBeginAllocSrcLocCallstack,
|
||||
CallstackMemory,
|
||||
Callstack,
|
||||
CallstackAlloc,
|
||||
CallstackSample,
|
||||
FrameImage,
|
||||
ZoneBegin,
|
||||
ZoneBeginCallstack,
|
||||
@@ -28,6 +31,7 @@ enum class QueueType : uint8_t
|
||||
LockSharedWait,
|
||||
LockSharedObtain,
|
||||
LockSharedRelease,
|
||||
LockName,
|
||||
MemAlloc,
|
||||
MemFree,
|
||||
MemAllocCallstack,
|
||||
@@ -45,9 +49,11 @@ enum class QueueType : uint8_t
|
||||
Terminate,
|
||||
KeepAlive,
|
||||
ThreadContext,
|
||||
GpuCalibration,
|
||||
Crash,
|
||||
CrashReport,
|
||||
ZoneValidation,
|
||||
ZoneValue,
|
||||
FrameMarkMsg,
|
||||
FrameMarkMsgStart,
|
||||
FrameMarkMsgEnd,
|
||||
@@ -57,14 +63,23 @@ enum class QueueType : uint8_t
|
||||
LockMark,
|
||||
MessageLiteral,
|
||||
MessageLiteralColor,
|
||||
MessageLiteralCallstack,
|
||||
MessageLiteralColorCallstack,
|
||||
GpuNewContext,
|
||||
CallstackFrameSize,
|
||||
CallstackFrame,
|
||||
SymbolInformation,
|
||||
CodeInformation,
|
||||
SysTimeReport,
|
||||
TidToPid,
|
||||
PlotConfig,
|
||||
ParamSetup,
|
||||
ParamPingback,
|
||||
CpuTopology,
|
||||
SingleStringData,
|
||||
SecondStringData,
|
||||
StringData,
|
||||
ThreadName,
|
||||
CustomStringData,
|
||||
PlotName,
|
||||
SourceLocationPayload,
|
||||
CallstackPayload,
|
||||
@@ -73,6 +88,7 @@ enum class QueueType : uint8_t
|
||||
FrameImageData,
|
||||
ExternalName,
|
||||
ExternalThreadName,
|
||||
SymbolCode,
|
||||
NUM_TYPES
|
||||
};
|
||||
|
||||
@@ -83,9 +99,13 @@ struct QueueThreadContext
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneBegin
|
||||
struct QueueZoneBeginLean
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueZoneBegin : public QueueZoneBeginLean
|
||||
{
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
@@ -99,6 +119,11 @@ struct QueueZoneValidation
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueZoneValue
|
||||
{
|
||||
uint64_t value;
|
||||
};
|
||||
|
||||
struct QueueStringTransfer
|
||||
{
|
||||
uint64_t ptr;
|
||||
@@ -112,13 +137,17 @@ struct QueueFrameMark
|
||||
|
||||
struct QueueFrameImage
|
||||
{
|
||||
uint64_t image; // ptr
|
||||
uint64_t frame;
|
||||
uint32_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t flip;
|
||||
};
|
||||
|
||||
struct QueueFrameImageFat : public QueueFrameImage
|
||||
{
|
||||
uint64_t image; // ptr
|
||||
};
|
||||
|
||||
struct QueueSourceLocation
|
||||
{
|
||||
uint64_t name;
|
||||
@@ -130,9 +159,10 @@ struct QueueSourceLocation
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneText
|
||||
struct QueueZoneTextFat
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
enum class LockType : uint8_t
|
||||
@@ -153,7 +183,6 @@ struct QueueLockTerminate
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockWait
|
||||
@@ -161,7 +190,6 @@ struct QueueLockWait
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockObtain
|
||||
@@ -185,6 +213,17 @@ struct QueueLockMark
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
struct QueueLockName
|
||||
{
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueLockNameFat : public QueueLockName
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
enum class PlotDataType : uint8_t
|
||||
{
|
||||
Float,
|
||||
@@ -208,7 +247,6 @@ struct QueuePlotData
|
||||
struct QueueMessage
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageColor : public QueueMessage
|
||||
@@ -218,6 +256,43 @@ struct QueueMessageColor : public QueueMessage
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueMessageLiteral : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageColorLiteral : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageFat : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMessageColorFat : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
// Don't change order, only add new entries at the end, this is also used on trace dumps!
|
||||
enum class GpuContextType : uint8_t
|
||||
{
|
||||
Invalid,
|
||||
OpenGl,
|
||||
Vulkan,
|
||||
OpenCL,
|
||||
Direct3D12
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
{
|
||||
GpuContextCalibration = 1 << 0
|
||||
};
|
||||
|
||||
struct QueueGpuNewContext
|
||||
{
|
||||
int64_t cpuTime;
|
||||
@@ -225,7 +300,8 @@ struct QueueGpuNewContext
|
||||
uint64_t thread;
|
||||
float period;
|
||||
uint8_t context;
|
||||
uint8_t accuracyBits;
|
||||
GpuContextFlags flags;
|
||||
GpuContextType type;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBegin
|
||||
@@ -252,6 +328,14 @@ struct QueueGpuTime
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuCalibration
|
||||
{
|
||||
int64_t gpuTime;
|
||||
int64_t cpuTime;
|
||||
int64_t cpuDelta;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueMemAlloc
|
||||
{
|
||||
int64_t time;
|
||||
@@ -267,22 +351,28 @@ struct QueueMemFree
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackMemory
|
||||
struct QueueCallstackFat
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstack
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackAlloc
|
||||
struct QueueCallstackAllocFat
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint64_t nativePtr;
|
||||
};
|
||||
|
||||
struct QueueCallstackSample
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueCallstackSampleFat : public QueueCallstackSample
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrameSize
|
||||
{
|
||||
uint64_t ptr;
|
||||
@@ -291,8 +381,20 @@ struct QueueCallstackFrameSize
|
||||
|
||||
struct QueueCallstackFrame
|
||||
{
|
||||
uint64_t name;
|
||||
uint64_t file;
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
uint32_t symLen;
|
||||
};
|
||||
|
||||
struct QueueSymbolInformation
|
||||
{
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct QueueCodeInformation
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint32_t line;
|
||||
};
|
||||
|
||||
@@ -330,6 +432,34 @@ struct QueueTidToPid
|
||||
uint64_t pid;
|
||||
};
|
||||
|
||||
enum class PlotFormatType : uint8_t
|
||||
{
|
||||
Number,
|
||||
Memory,
|
||||
Percentage
|
||||
};
|
||||
|
||||
struct QueuePlotConfig
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
struct QueueParamSetup
|
||||
{
|
||||
uint32_t idx;
|
||||
uint64_t name; // ptr
|
||||
uint8_t isBool;
|
||||
int32_t val;
|
||||
};
|
||||
|
||||
struct QueueCpuTopology
|
||||
{
|
||||
uint32_t package;
|
||||
uint32_t core;
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueHeader
|
||||
{
|
||||
union
|
||||
@@ -346,38 +476,54 @@ struct QueueItem
|
||||
{
|
||||
QueueThreadContext threadCtx;
|
||||
QueueZoneBegin zoneBegin;
|
||||
QueueZoneBeginLean zoneBeginLean;
|
||||
QueueZoneEnd zoneEnd;
|
||||
QueueZoneValidation zoneValidation;
|
||||
QueueZoneValue zoneValue;
|
||||
QueueStringTransfer stringTransfer;
|
||||
QueueFrameMark frameMark;
|
||||
QueueFrameImage frameImage;
|
||||
QueueFrameImageFat frameImageFat;
|
||||
QueueSourceLocation srcloc;
|
||||
QueueZoneText zoneText;
|
||||
QueueZoneTextFat zoneTextFat;
|
||||
QueueLockAnnounce lockAnnounce;
|
||||
QueueLockTerminate lockTerminate;
|
||||
QueueLockWait lockWait;
|
||||
QueueLockObtain lockObtain;
|
||||
QueueLockRelease lockRelease;
|
||||
QueueLockMark lockMark;
|
||||
QueueLockName lockName;
|
||||
QueueLockNameFat lockNameFat;
|
||||
QueuePlotData plotData;
|
||||
QueueMessage message;
|
||||
QueueMessageColor messageColor;
|
||||
QueueMessageLiteral messageLiteral;
|
||||
QueueMessageColorLiteral messageColorLiteral;
|
||||
QueueMessageFat messageFat;
|
||||
QueueMessageColorFat messageColorFat;
|
||||
QueueGpuNewContext gpuNewContext;
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
QueueGpuTime gpuTime;
|
||||
QueueGpuCalibration gpuCalibration;
|
||||
QueueMemAlloc memAlloc;
|
||||
QueueMemFree memFree;
|
||||
QueueCallstackMemory callstackMemory;
|
||||
QueueCallstack callstack;
|
||||
QueueCallstackAlloc callstackAlloc;
|
||||
QueueCallstackFat callstackFat;
|
||||
QueueCallstackAllocFat callstackAllocFat;
|
||||
QueueCallstackSample callstackSample;
|
||||
QueueCallstackSampleFat callstackSampleFat;
|
||||
QueueCallstackFrameSize callstackFrameSize;
|
||||
QueueCallstackFrame callstackFrame;
|
||||
QueueSymbolInformation symbolInformation;
|
||||
QueueCodeInformation codeInformation;
|
||||
QueueCrashReport crashReport;
|
||||
QueueSysTime sysTime;
|
||||
QueueContextSwitch contextSwitch;
|
||||
QueueThreadWakeup threadWakeup;
|
||||
QueueTidToPid tidToPid;
|
||||
QueuePlotConfig plotConfig;
|
||||
QueueParamSetup paramSetup;
|
||||
QueueCpuTopology cpuTopology;
|
||||
};
|
||||
};
|
||||
#pragma pack()
|
||||
@@ -385,17 +531,20 @@ struct QueueItem
|
||||
|
||||
enum { QueueItemSize = sizeof( QueueItem ) };
|
||||
|
||||
static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
|
||||
static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // zone text
|
||||
sizeof( QueueHeader ), // zone name
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstack ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
|
||||
sizeof( QueueHeader ), // callstack memory
|
||||
sizeof( QueueHeader ), // callstack
|
||||
sizeof( QueueHeader ), // callstack alloc
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
|
||||
@@ -406,6 +555,7 @@ static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
|
||||
@@ -424,9 +574,11 @@ static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // terminate
|
||||
sizeof( QueueHeader ), // keep alive
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
|
||||
sizeof( QueueHeader ), // crash
|
||||
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValue ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
|
||||
@@ -434,17 +586,26 @@ static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCodeInformation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSysTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
|
||||
sizeof( QueueHeader ), // param pingback
|
||||
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
|
||||
sizeof( QueueHeader ), // single string data
|
||||
sizeof( QueueHeader ), // second string data
|
||||
// keep all QueueStringTransfer below
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
|
||||
@@ -453,6 +614,7 @@ static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code
|
||||
};
|
||||
|
||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||
@@ -460,6 +622,6 @@ static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType:
|
||||
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
|
||||
static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <new>
|
||||
#include <stdio.h>
|
||||
@@ -23,6 +22,10 @@
|
||||
#else
|
||||
# include <arpa/inet.h>
|
||||
# include <sys/socket.h>
|
||||
# include <sys/param.h>
|
||||
# include <errno.h>
|
||||
# include <fcntl.h>
|
||||
# include <netinet/in.h>
|
||||
# include <netdb.h>
|
||||
# include <unistd.h>
|
||||
# include <poll.h>
|
||||
@@ -61,11 +64,15 @@ void InitWinSock()
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
enum { BufSize = 128 * 1024 };
|
||||
|
||||
Socket::Socket()
|
||||
: m_buf( (char*)tracy_malloc( BufSize ) )
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( -1 )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
@@ -77,21 +84,72 @@ Socket::Socket( int sock )
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( sock )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
}
|
||||
|
||||
Socket::~Socket()
|
||||
{
|
||||
tracy_free( m_buf );
|
||||
if( m_sock != -1 )
|
||||
if( m_sock.load( std::memory_order_relaxed ) != -1 )
|
||||
{
|
||||
Close();
|
||||
}
|
||||
if( m_ptr )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_connSock );
|
||||
#else
|
||||
close( m_connSock );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Connect( const char* addr, int port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
assert( !IsValid() );
|
||||
|
||||
if( m_ptr )
|
||||
{
|
||||
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
|
||||
if( c == -1 )
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
const auto err = WSAGetLastError();
|
||||
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
|
||||
if( err != WSAEISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
closesocket( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
const auto err = errno;
|
||||
if( err == EALREADY || err == EINPROGRESS ) return false;
|
||||
if( err != EISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
close( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( m_connSock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( m_connSock, F_GETFL, 0 );
|
||||
fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
m_sock.store( m_connSock, std::memory_order_relaxed );
|
||||
freeaddrinfo( m_res );
|
||||
m_ptr = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
@@ -112,43 +170,75 @@ bool Socket::Connect( const char* addr, int port )
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
u_long nonblocking = 1;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
close( sock );
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags | O_NONBLOCK );
|
||||
#endif
|
||||
continue;
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 )
|
||||
{
|
||||
break;
|
||||
}
|
||||
break;
|
||||
else
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
const auto err = WSAGetLastError();
|
||||
if( err != WSAEWOULDBLOCK )
|
||||
{
|
||||
closesocket( sock );
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
if( errno != EINPROGRESS )
|
||||
{
|
||||
close( sock );
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
m_res = res;
|
||||
m_ptr = ptr;
|
||||
m_connSock = sock;
|
||||
return false;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock = sock;
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
|
||||
m_sock.store( sock, std::memory_order_relaxed );
|
||||
return true;
|
||||
}
|
||||
|
||||
void Socket::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
assert( sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_sock );
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
close( sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
m_sock.store( -1, std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
int Socket::Send( const void* _buf, int len )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (const char*)_buf;
|
||||
assert( m_sock != -1 );
|
||||
assert( sock != -1 );
|
||||
auto start = buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
auto ret = send( m_sock, buf, len, MSG_NOSIGNAL );
|
||||
auto ret = send( sock, buf, len, MSG_NOSIGNAL );
|
||||
if( ret == -1 ) return -1;
|
||||
len -= ret;
|
||||
buf += ret;
|
||||
@@ -158,13 +248,14 @@ int Socket::Send( const void* _buf, int len )
|
||||
|
||||
int Socket::GetSendBufSize()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
int bufSize;
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
int sz = sizeof( bufSize );
|
||||
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
|
||||
#else
|
||||
socklen_t sz = sizeof( bufSize );
|
||||
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
|
||||
#endif
|
||||
return bufSize;
|
||||
}
|
||||
@@ -192,7 +283,7 @@ int Socket::RecvBuffered( void* buf, int len, int timeout )
|
||||
m_bufLeft = Recv( m_buf, BufSize, timeout );
|
||||
if( m_bufLeft <= 0 ) return m_bufLeft;
|
||||
|
||||
const auto sz = std::min( len, m_bufLeft );
|
||||
const auto sz = len < m_bufLeft ? len : m_bufLeft;
|
||||
memcpy( buf, m_buf, sz );
|
||||
m_bufPtr = m_buf + sz;
|
||||
m_bufLeft -= sz;
|
||||
@@ -201,15 +292,16 @@ int Socket::RecvBuffered( void* buf, int len, int timeout )
|
||||
|
||||
int Socket::Recv( void* _buf, int len, int timeout )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
if( poll( &fd, 1, timeout ) > 0 )
|
||||
{
|
||||
return recv( m_sock, buf, len, 0 );
|
||||
return recv( sock, buf, len, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -217,33 +309,36 @@ int Socket::Recv( void* _buf, int len, int timeout )
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Read( void* _buf, int len, int timeout, std::function<bool()> exitCb )
|
||||
bool Socket::Read( void* buf, int len, int timeout )
|
||||
{
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( exitCb() ) return false;
|
||||
const auto sz = RecvBuffered( buf, len, timeout );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
}
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ReadImpl( char*& buf, int& len, int timeout )
|
||||
{
|
||||
const auto sz = RecvBuffered( buf, len, timeout );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -262,15 +357,21 @@ bool Socket::ReadRaw( void* _buf, int len, int timeout )
|
||||
|
||||
bool Socket::HasData()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
if( m_bufLeft > 0 ) return true;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
return poll( &fd, 1, 0 ) > 0;
|
||||
}
|
||||
|
||||
bool Socket::IsValid() const
|
||||
{
|
||||
return m_sock.load( std::memory_order_relaxed ) >= 0;
|
||||
}
|
||||
|
||||
|
||||
ListenSocket::ListenSocket()
|
||||
: m_sock( -1 )
|
||||
@@ -295,7 +396,9 @@ bool ListenSocket::Listen( int port, int backlog )
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_INET6;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
#ifndef TRACY_ONLY_LOCALHOST
|
||||
hints.ai_flags = AI_PASSIVE;
|
||||
#endif
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%i", port );
|
||||
@@ -303,15 +406,29 @@ bool ListenSocket::Listen( int port, int backlog )
|
||||
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
|
||||
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
|
||||
if (m_sock == -1)
|
||||
{
|
||||
// IPV6 protocol may not be available/is disabled. Try to create a socket
|
||||
// with the IPV4 protocol
|
||||
hints.ai_family = AF_INET;
|
||||
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
|
||||
if( m_sock == -1 ) return false;
|
||||
}
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
#elif defined BSD
|
||||
int val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
|
||||
#else
|
||||
int val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); return false; }
|
||||
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); return false; }
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
freeaddrinfo( res );
|
||||
return true;
|
||||
}
|
||||
@@ -449,8 +566,10 @@ IpAddress::~IpAddress()
|
||||
|
||||
void IpAddress::Set( const struct sockaddr& addr )
|
||||
{
|
||||
#if __MINGW32__
|
||||
auto ai = (struct sockaddr_in*)&addr;
|
||||
#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
|
||||
struct sockaddr_in tmp;
|
||||
memcpy( &tmp, &addr, sizeof( tmp ) );
|
||||
auto ai = &tmp;
|
||||
#else
|
||||
auto ai = (const struct sockaddr_in*)&addr;
|
||||
#endif
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
#ifndef __TRACYSOCKET_HPP__
|
||||
#define __TRACYSOCKET_HPP__
|
||||
|
||||
#include <functional>
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "TracyForceInline.hpp"
|
||||
|
||||
struct addrinfo;
|
||||
struct sockaddr;
|
||||
|
||||
namespace tracy
|
||||
@@ -14,8 +18,6 @@ void InitWinSock();
|
||||
|
||||
class Socket
|
||||
{
|
||||
enum { BufSize = 128 * 1024 };
|
||||
|
||||
public:
|
||||
Socket();
|
||||
Socket( int sock );
|
||||
@@ -27,9 +29,23 @@ public:
|
||||
int Send( const void* buf, int len );
|
||||
int GetSendBufSize();
|
||||
|
||||
bool Read( void* buf, int len, int timeout, std::function<bool()> exitCb );
|
||||
bool Read( void* buf, int len, int timeout );
|
||||
|
||||
template<typename ShouldExit>
|
||||
bool Read( void* buf, int len, int timeout, ShouldExit exitCb )
|
||||
{
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( exitCb() ) return false;
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReadRaw( void* buf, int len, int timeout );
|
||||
bool HasData();
|
||||
bool IsValid() const;
|
||||
|
||||
Socket( const Socket& ) = delete;
|
||||
Socket( Socket&& ) = delete;
|
||||
@@ -40,10 +56,16 @@ private:
|
||||
int RecvBuffered( void* buf, int len, int timeout );
|
||||
int Recv( void* buf, int len, int timeout );
|
||||
|
||||
bool ReadImpl( char*& buf, int& len, int timeout );
|
||||
|
||||
char* m_buf;
|
||||
char* m_bufPtr;
|
||||
int m_sock;
|
||||
std::atomic<int> m_sock;
|
||||
int m_bufLeft;
|
||||
|
||||
struct addrinfo *m_res;
|
||||
struct addrinfo *m_ptr;
|
||||
int m_connSock;
|
||||
};
|
||||
|
||||
class ListenSocket
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable:4996)
|
||||
#endif
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
#else
|
||||
@@ -15,10 +18,16 @@
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# ifndef __ANDROID__
|
||||
# include <syscall.h>
|
||||
# ifdef __ANDROID__
|
||||
# include <sys/types.h>
|
||||
# else
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
# include <fcntl.h>
|
||||
#elif defined __FreeBSD__
|
||||
# include <sys/thr.h>
|
||||
#elif defined __NetBSD__ || defined __DragonFly__
|
||||
# include <sys/lwp.h>
|
||||
#endif
|
||||
|
||||
#ifdef __MINGW32__
|
||||
@@ -30,6 +39,11 @@
|
||||
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR );
|
||||
extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include <atomic>
|
||||
# include "TracyAlloc.hpp"
|
||||
@@ -38,6 +52,41 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
TRACY_API uint64_t GetThreadHandleImpl()
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return id;
|
||||
#elif defined __ANDROID__
|
||||
return (uint64_t)gettid();
|
||||
#elif defined __linux__
|
||||
return (uint64_t)syscall( SYS_gettid );
|
||||
#elif defined __FreeBSD__
|
||||
long id;
|
||||
thr_self( &id );
|
||||
return id;
|
||||
#elif defined __NetBSD__
|
||||
return _lwp_self();
|
||||
#elif defined __DragonFly__
|
||||
return lwp_gettid();
|
||||
#elif defined __OpenBSD__
|
||||
return getthrid();
|
||||
#else
|
||||
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( pthread_self() );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
struct ThreadNameData
|
||||
{
|
||||
@@ -45,44 +94,50 @@ struct ThreadNameData
|
||||
const char* name;
|
||||
ThreadNameData* next;
|
||||
};
|
||||
TRACY_API std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
#endif
|
||||
|
||||
void SetThreadName( const char* name )
|
||||
TRACY_API void SetThreadName( const char* name )
|
||||
{
|
||||
#if defined _WIN32 && !defined PTW32_VERSION && !defined __WINPTHREADS_VERSION
|
||||
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
SetThreadDescription( GetCurrentThread(), buf );
|
||||
# else
|
||||
const DWORD MS_VC_EXCEPTION=0x406D1388;
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
|
||||
if( _SetThreadDescription )
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
_SetThreadDescription( GetCurrentThread(), buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
# if defined _MSC_VER
|
||||
const DWORD MS_VC_EXCEPTION=0x406D1388;
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
# pragma pack(pop)
|
||||
|
||||
DWORD ThreadId = GetCurrentThreadId();
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = ThreadId;
|
||||
info.dwFlags = 0;
|
||||
DWORD ThreadId = GetCurrentThreadId();
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = ThreadId;
|
||||
info.dwFlags = 0;
|
||||
|
||||
__try
|
||||
{
|
||||
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
}
|
||||
__try
|
||||
{
|
||||
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
}
|
||||
# endif
|
||||
}
|
||||
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ && !defined __CYGWIN__
|
||||
{
|
||||
const auto sz = strlen( name );
|
||||
@@ -105,7 +160,7 @@ void SetThreadName( const char* name )
|
||||
const auto sz = strlen( name );
|
||||
char* buf = (char*)tracy_malloc( sz+1 );
|
||||
memcpy( buf, name, sz );
|
||||
buf[sz+1] = '\0';
|
||||
buf[sz] = '\0';
|
||||
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
|
||||
data->id = detail::GetThreadHandleImpl();
|
||||
data->name = buf;
|
||||
@@ -115,7 +170,7 @@ void SetThreadName( const char* name )
|
||||
#endif
|
||||
}
|
||||
|
||||
const char* GetThreadName( uint64_t id )
|
||||
TRACY_API const char* GetThreadName( uint64_t id )
|
||||
{
|
||||
static char buf[256];
|
||||
#ifdef TRACY_ENABLE
|
||||
@@ -129,26 +184,23 @@ const char* GetThreadName( uint64_t id )
|
||||
ptr = ptr->next;
|
||||
}
|
||||
#else
|
||||
# ifdef _WIN32
|
||||
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
PWSTR tmp;
|
||||
GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
return buf;
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
# endif
|
||||
# elif defined __GLIBC__ && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined __CYGWIN__
|
||||
if( pthread_getname_np( (pthread_t)id, buf, 256 ) == 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
# elif defined __linux__
|
||||
int cs, fd;
|
||||
char path[32];
|
||||
@@ -185,3 +237,13 @@ const char* GetThreadName( uint64_t id )
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,23 +1,6 @@
|
||||
#ifndef __TRACYSYSTEM_HPP__
|
||||
#define __TRACYSYSTEM_HPP__
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# ifndef _WINDOWS_
|
||||
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
|
||||
# endif
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# include <unistd.h>
|
||||
# ifdef __ANDROID__
|
||||
# include <sys/types.h>
|
||||
# else
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "TracyApi.h"
|
||||
@@ -27,24 +10,7 @@ namespace tracy
|
||||
|
||||
namespace detail
|
||||
{
|
||||
static inline uint64_t GetThreadHandleImpl()
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return id;
|
||||
#elif defined __ANDROID__
|
||||
return (uint64_t)gettid();
|
||||
#elif defined __linux__
|
||||
return (uint64_t)syscall( SYS_gettid );
|
||||
#else
|
||||
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( pthread_self() );
|
||||
#endif
|
||||
}
|
||||
TRACY_API uint64_t GetThreadHandleImpl();
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
@@ -56,8 +22,8 @@ static inline uint64_t GetThreadHandle()
|
||||
}
|
||||
#endif
|
||||
|
||||
void SetThreadName( const char* name );
|
||||
const char* GetThreadName( uint64_t id );
|
||||
TRACY_API void SetThreadName( const char* name );
|
||||
TRACY_API const char* GetThreadName( uint64_t id );
|
||||
|
||||
}
|
||||
|
||||
|
||||
18
common/src-from-vcxproj.mk
Normal file
@@ -0,0 +1,18 @@
|
||||
# Extract the actual list of source files from a sibling Visual Studio project.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
SRC := $(SRC)
|
||||
SRC2 := $(SRC2)
|
||||
SRC3 := $(SRC3)
|
||||
|
||||
# Paths here are relative to the directory in which make was invoked, not to
|
||||
# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project
|
||||
# of whichever tool is including this makefile fragment.
|
||||
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
# The tool-specific makefile may request that certain files be omitted.
|
||||
SRC += $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 += $(filter-out $(FILTER),$(BASE2))
|
||||
SRC3 += $(filter-out $(FILTER),$(BASE3))
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright (c) 2015 Jeff Preshing
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef __TRACY_CPP11OM_BENAPHORE_H__
|
||||
#define __TRACY_CPP11OM_BENAPHORE_H__
|
||||
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include "tracy_sema.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class NonRecursiveBenaphore
|
||||
{
|
||||
private:
|
||||
std::atomic<int> m_contentionCount;
|
||||
DefaultSemaphoreType m_sema;
|
||||
|
||||
public:
|
||||
NonRecursiveBenaphore() : m_contentionCount(0) {}
|
||||
|
||||
void lock()
|
||||
{
|
||||
if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0)
|
||||
{
|
||||
m_sema.wait();
|
||||
}
|
||||
}
|
||||
|
||||
bool try_lock()
|
||||
{
|
||||
if (m_contentionCount.load(std::memory_order_relaxed) != 0)
|
||||
return false;
|
||||
int expected = 0;
|
||||
return m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
int oldCount = m_contentionCount.fetch_sub(1, std::memory_order_release);
|
||||
assert(oldCount > 0);
|
||||
if (oldCount > 1)
|
||||
{
|
||||
m_sema.signal();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // __CPP11OM_BENAPHORE_H__
|
||||
@@ -231,12 +231,12 @@ static unsigned LZ4_isLittleEndian(void)
|
||||
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
|
||||
/* lie to the compiler about data alignment; use with caution */
|
||||
|
||||
static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
|
||||
static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
|
||||
static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
|
||||
LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
|
||||
LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
|
||||
LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
|
||||
|
||||
static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
|
||||
static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
|
||||
LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
|
||||
LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
|
||||
|
||||
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
|
||||
|
||||
@@ -244,36 +244,36 @@ static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
|
||||
/* currently only defined for gcc and icc */
|
||||
typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
|
||||
|
||||
static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
|
||||
static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
|
||||
static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
|
||||
LZ4_FORCE_INLINE U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
|
||||
LZ4_FORCE_INLINE U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
|
||||
LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
|
||||
|
||||
static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
|
||||
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
|
||||
LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
|
||||
LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
|
||||
|
||||
#else /* safe and portable access using memcpy() */
|
||||
|
||||
static U16 LZ4_read16(const void* memPtr)
|
||||
LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr)
|
||||
{
|
||||
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static U32 LZ4_read32(const void* memPtr)
|
||||
LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr)
|
||||
{
|
||||
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static reg_t LZ4_read_ARCH(const void* memPtr)
|
||||
LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr)
|
||||
{
|
||||
reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static void LZ4_write16(void* memPtr, U16 value)
|
||||
LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
static void LZ4_write32(void* memPtr, U32 value)
|
||||
LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
@@ -281,7 +281,7 @@ static void LZ4_write32(void* memPtr, U32 value)
|
||||
#endif /* LZ4_FORCE_MEMORY_ACCESS */
|
||||
|
||||
|
||||
static U16 LZ4_readLE16(const void* memPtr)
|
||||
LZ4_FORCE_INLINE U16 LZ4_readLE16(const void* memPtr)
|
||||
{
|
||||
if (LZ4_isLittleEndian()) {
|
||||
return LZ4_read16(memPtr);
|
||||
@@ -291,7 +291,7 @@ static U16 LZ4_readLE16(const void* memPtr)
|
||||
}
|
||||
}
|
||||
|
||||
static void LZ4_writeLE16(void* memPtr, U16 value)
|
||||
LZ4_FORCE_INLINE void LZ4_writeLE16(void* memPtr, U16 value)
|
||||
{
|
||||
if (LZ4_isLittleEndian()) {
|
||||
LZ4_write16(memPtr, value);
|
||||
@@ -318,7 +318,7 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
|
||||
|
||||
|
||||
#ifndef LZ4_FAST_DEC_LOOP
|
||||
# if defined(__i386__) || defined(__x86_64__)
|
||||
# if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
||||
# define LZ4_FAST_DEC_LOOP 1
|
||||
# else
|
||||
# define LZ4_FAST_DEC_LOOP 0
|
||||
@@ -367,16 +367,35 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si
|
||||
BYTE v[8];
|
||||
switch(offset) {
|
||||
case 1:
|
||||
memset(v, *srcPtr, 8);
|
||||
if(sizeof(void*) == 8) {
|
||||
U64 m = *srcPtr * 0x0101010101010101;
|
||||
memcpy(v, &m, 8);
|
||||
} else {
|
||||
memset(v, *srcPtr, 8);
|
||||
}
|
||||
goto copy_loop;
|
||||
case 2:
|
||||
memcpy(v, srcPtr, 2);
|
||||
memcpy(&v[2], srcPtr, 2);
|
||||
memcpy(&v[4], &v[0], 4);
|
||||
if(sizeof(void*) == 8) {
|
||||
U16 m;
|
||||
memcpy(&m, srcPtr, 2);
|
||||
U64 n = m * 0x0001000100010001;
|
||||
memcpy(v, &n, 8);
|
||||
} else {
|
||||
memcpy(v, srcPtr, 2);
|
||||
memcpy(&v[2], srcPtr, 2);
|
||||
memcpy(&v[4], &v[0], 4);
|
||||
}
|
||||
goto copy_loop;
|
||||
case 4:
|
||||
memcpy(v, srcPtr, 4);
|
||||
memcpy(&v[4], srcPtr, 4);
|
||||
if(sizeof(void*) == 8) {
|
||||
U32 m;
|
||||
memcpy(&m, srcPtr, 4);
|
||||
U64 n = m | (U64(m) << 32);
|
||||
memcpy(v, &n, 8);
|
||||
} else {
|
||||
memcpy(v, srcPtr, 4);
|
||||
memcpy(&v[4], srcPtr, 4);
|
||||
}
|
||||
goto copy_loop;
|
||||
default:
|
||||
LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
|
||||
@@ -454,14 +473,13 @@ static int g_debuglog_enable = 1;
|
||||
/*-************************************
|
||||
* Common functions
|
||||
**************************************/
|
||||
static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
LZ4_FORCE_INLINE
|
||||
unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
{
|
||||
if (LZ4_isLittleEndian()) {
|
||||
if (sizeof(val)==8) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward64( &r, (U64)val );
|
||||
return (int)(r>>3);
|
||||
return (unsigned)_tzcnt_u64((U64)val)>>3;
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
@@ -477,9 +495,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
# endif
|
||||
} else /* 32 bits */ {
|
||||
# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
unsigned long r;
|
||||
_BitScanForward( &r, (U32)val );
|
||||
return (int)(r>>3);
|
||||
return (unsigned)_tzcnt_u32((U32)val)>>3;
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
@@ -613,7 +629,7 @@ int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int
|
||||
/*-******************************
|
||||
* Compression functions
|
||||
********************************/
|
||||
static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
|
||||
LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
|
||||
{
|
||||
if (tableType == byU16)
|
||||
return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
|
||||
@@ -621,7 +637,7 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
|
||||
return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
|
||||
}
|
||||
|
||||
static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
|
||||
LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
|
||||
{
|
||||
const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
|
||||
if (LZ4_isLittleEndian()) {
|
||||
@@ -639,7 +655,7 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
|
||||
return LZ4_hash4(LZ4_read32(p), tableType);
|
||||
}
|
||||
|
||||
static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
|
||||
LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
|
||||
{
|
||||
switch (tableType)
|
||||
{
|
||||
@@ -651,7 +667,7 @@ static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t cons
|
||||
}
|
||||
}
|
||||
|
||||
static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
|
||||
LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
|
||||
void* tableBase, tableType_t const tableType,
|
||||
const BYTE* srcBase)
|
||||
{
|
||||
@@ -676,7 +692,7 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_
|
||||
* Assumption 1 : only valid if tableType == byU32 or byU16.
|
||||
* Assumption 2 : h is presumed valid (within limits of hash table)
|
||||
*/
|
||||
static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
|
||||
LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
|
||||
{
|
||||
LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
|
||||
if (tableType == byU32) {
|
||||
@@ -692,7 +708,7 @@ static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableTyp
|
||||
assert(0); return 0; /* forbidden case */
|
||||
}
|
||||
|
||||
static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
|
||||
LZ4_FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
|
||||
{
|
||||
if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
|
||||
if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
|
||||
|
||||
@@ -122,7 +122,7 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
|
||||
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
||||
*/
|
||||
#ifndef LZ4_MEMORY_USAGE
|
||||
# define LZ4_MEMORY_USAGE 12
|
||||
# define LZ4_MEMORY_USAGE 14
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
// Copyright (c) 2015 Jeff Preshing
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef __TRACY_CPP11OM_SEMAPHORE_H__
|
||||
#define __TRACY_CPP11OM_SEMAPHORE_H__
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
#if defined(__MACH__)
|
||||
#include <mach/mach.h>
|
||||
#elif defined(__unix__)
|
||||
#include <semaphore.h>
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(_WIN32)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (Windows)
|
||||
//---------------------------------------------------------
|
||||
#ifndef MAXLONG
|
||||
enum { MAXLONG = 0x7fffffff };
|
||||
#endif
|
||||
|
||||
#ifndef INFINITE
|
||||
enum { INFINITE = 0xFFFFFFFF };
|
||||
#endif
|
||||
|
||||
#ifndef _WINDOWS_
|
||||
typedef void* HANDLE;
|
||||
|
||||
extern "C" __declspec(dllimport) HANDLE __stdcall CreateSemaphoreA( void*, long, long, const char* );
|
||||
extern "C" __declspec(dllimport) int __stdcall CloseHandle( HANDLE );
|
||||
extern "C" __declspec(dllimport) unsigned long __stdcall WaitForSingleObject( HANDLE, unsigned long );
|
||||
extern "C" __declspec(dllimport) int __stdcall ReleaseSemaphore( HANDLE, long, long* );
|
||||
#endif
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
HANDLE m_hSema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
m_hSema = CreateSemaphoreA(NULL, initialCount, MAXLONG, NULL);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
CloseHandle(m_hSema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
WaitForSingleObject(m_hSema, INFINITE);
|
||||
}
|
||||
|
||||
void signal(int count = 1)
|
||||
{
|
||||
ReleaseSemaphore(m_hSema, count, NULL);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#elif defined(__MACH__)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (Apple iOS and OSX)
|
||||
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
|
||||
//---------------------------------------------------------
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
semaphore_t m_sema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
semaphore_destroy(mach_task_self(), m_sema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
semaphore_wait(m_sema);
|
||||
}
|
||||
|
||||
void signal()
|
||||
{
|
||||
semaphore_signal(m_sema);
|
||||
}
|
||||
|
||||
void signal(int count)
|
||||
{
|
||||
while (count-- > 0)
|
||||
{
|
||||
semaphore_signal(m_sema);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#elif defined(__unix__)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (POSIX, Linux)
|
||||
//---------------------------------------------------------
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
sem_t m_sema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
sem_init(&m_sema, 0, initialCount);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
sem_destroy(&m_sema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
|
||||
int rc;
|
||||
do
|
||||
{
|
||||
rc = sem_wait(&m_sema);
|
||||
}
|
||||
while (rc == -1 && errno == EINTR);
|
||||
}
|
||||
|
||||
void signal()
|
||||
{
|
||||
sem_post(&m_sema);
|
||||
}
|
||||
|
||||
void signal(int count)
|
||||
{
|
||||
while (count-- > 0)
|
||||
{
|
||||
sem_post(&m_sema);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported platform!
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//---------------------------------------------------------
|
||||
// LightweightSemaphore
|
||||
//---------------------------------------------------------
|
||||
class LightweightSemaphore
|
||||
{
|
||||
private:
|
||||
std::atomic<int> m_count;
|
||||
Semaphore m_sema;
|
||||
|
||||
void waitWithPartialSpinning()
|
||||
{
|
||||
int oldCount;
|
||||
// Is there a better way to set the initial spin count?
|
||||
// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
|
||||
// as threads start hitting the kernel semaphore.
|
||||
int spin = 10000;
|
||||
while (spin--)
|
||||
{
|
||||
oldCount = m_count.load(std::memory_order_relaxed);
|
||||
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))
|
||||
return;
|
||||
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
|
||||
}
|
||||
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
|
||||
if (oldCount <= 0)
|
||||
{
|
||||
m_sema.wait();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
LightweightSemaphore(int initialCount = 0) : m_count(initialCount)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
}
|
||||
|
||||
bool tryWait()
|
||||
{
|
||||
int oldCount = m_count.load(std::memory_order_relaxed);
|
||||
return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
if (!tryWait())
|
||||
waitWithPartialSpinning();
|
||||
}
|
||||
|
||||
void signal(int count = 1)
|
||||
{
|
||||
int oldCount = m_count.fetch_add(count, std::memory_order_release);
|
||||
int toRelease = -oldCount < count ? -oldCount : count;
|
||||
if (toRelease > 0)
|
||||
{
|
||||
m_sema.signal(toRelease);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef LightweightSemaphore DefaultSemaphoreType;
|
||||
|
||||
}
|
||||
|
||||
#endif // __CPP11OM_SEMAPHORE_H__
|
||||
71
common/unix.mk
Normal file
@@ -0,0 +1,71 @@
|
||||
# Common code needed by most Tracy Unix Makefiles.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
LIBS := $(LIBS)
|
||||
|
||||
# Tracy does not use TBB directly, but the implementation of parallel algorithms
|
||||
# in some versions of libstdc++ depends on TBB. When it does, you must
|
||||
# explicitly link against -ltbb.
|
||||
#
|
||||
# Some distributions have pgk-config files for TBB, others don't.
|
||||
ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?))
|
||||
LIBS += $(shell pkg-config --libs tbb)
|
||||
else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?))
|
||||
LIBS += -ltbb
|
||||
endif
|
||||
|
||||
OBJDIRBASE := obj/$(BUILD)
|
||||
OBJDIR := $(OBJDIRBASE)/o/o/o
|
||||
|
||||
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
|
||||
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
|
||||
OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o))
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
$(OBJDIR)/%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.m
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.m
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
ifeq (yes,$(SHARED_LIBRARY))
|
||||
$(IMAGE): $(OBJ) $(OBJ2)
|
||||
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -shared -o $@
|
||||
else
|
||||
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3)
|
||||
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(LIBS) -o $@
|
||||
endif
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d))
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJDIRBASE) $(IMAGE)*
|
||||
|
||||
.PHONY: clean all
|
||||
12
csvexport/build/unix/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
all: debug
|
||||
|
||||
debug:
|
||||
@+make -f debug.mk all
|
||||
|
||||
release:
|
||||
@+make -f release.mk all
|
||||
|
||||
clean:
|
||||
@+make -f build.mk clean
|
||||
|
||||
.PHONY: all clean debug release
|
||||
12
csvexport/build/unix/build.mk
Normal file
@@ -0,0 +1,12 @@
|
||||
CFLAGS +=
|
||||
CXXFLAGS := $(CFLAGS) -std=gnu++17
|
||||
# DEFINES += -DTRACY_NO_STATISTICS
|
||||
INCLUDES := $(shell pkg-config --cflags capstone)
|
||||
LIBS := $(shell pkg-config --libs capstone) -lpthread
|
||||
PROJECT := csvexport
|
||||
IMAGE := $(PROJECT)-$(BUILD)
|
||||
|
||||
FILTER :=
|
||||
include ../../../common/src-from-vcxproj.mk
|
||||
|
||||
include ../../../common/unix.mk
|
||||
11
csvexport/build/unix/debug.mk
Normal file
@@ -0,0 +1,11 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -g3 -Wall
|
||||
DEFINES := -DDEBUG
|
||||
BUILD := debug
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
7
csvexport/build/unix/release.mk
Normal file
@@ -0,0 +1,7 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -O3 -s -march=native
|
||||
DEFINES := -DNDEBUG
|
||||
BUILD := release
|
||||
|
||||
include build.mk
|
||||
31
csvexport/build/win32/csvexport.sln
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30225.117
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csvexport", "csvexport.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.Build.0 = Debug|Win32
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.ActiveCfg = Release|Win32
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
235
csvexport/build/win32/csvexport.vcxproj
Normal file
@@ -0,0 +1,235 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
|
||||
<RootNamespace>capture</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\debug\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c" />
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
|
||||
<ClCompile Include="..\..\..\zstd\debug.c" />
|
||||
<ClCompile Include="..\..\..\zstd\entropy_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\error_private.c" />
|
||||
<ClCompile Include="..\..\..\zstd\fse_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\fse_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\hist.c" />
|
||||
<ClCompile Include="..\..\..\zstd\huf_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\pool.c" />
|
||||
<ClCompile Include="..\..\..\zstd\threading.c" />
|
||||
<ClCompile Include="..\..\..\zstd\xxhash.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_lazy.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ldm.c" />
|
||||
<ClCompile Include="..\..\..\zstd\zstd_opt.c" />
|
||||
<ClCompile Include="..\..\src\csvexport.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h" />
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
|
||||
<ClInclude Include="..\..\..\zstd\bitstream.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compiler.h" />
|
||||
<ClInclude Include="..\..\..\zstd\cpu.h" />
|
||||
<ClInclude Include="..\..\..\zstd\debug.h" />
|
||||
<ClInclude Include="..\..\..\zstd\error_private.h" />
|
||||
<ClInclude Include="..\..\..\zstd\fse.h" />
|
||||
<ClInclude Include="..\..\..\zstd\hist.h" />
|
||||
<ClInclude Include="..\..\..\zstd\huf.h" />
|
||||
<ClInclude Include="..\..\..\zstd\mem.h" />
|
||||
<ClInclude Include="..\..\..\zstd\pool.h" />
|
||||
<ClInclude Include="..\..\..\zstd\threading.h" />
|
||||
<ClInclude Include="..\..\..\zstd\xxhash.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_lazy.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ldm.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_opt.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
300
csvexport/build/win32/csvexport.vcxproj.filters
Normal file
@@ -0,0 +1,300 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="src">
|
||||
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="server">
|
||||
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="common">
|
||||
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd">
|
||||
<UniqueIdentifier>{043ecb94-f240-4986-94b0-bc5bbd415a82}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="getopt">
|
||||
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\debug.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\entropy_common.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\error_private.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\fse_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\fse_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\hist.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\huf_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\huf_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\pool.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\threading.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\xxhash.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_common.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ddict.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_fast.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_lazy.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_ldm.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstd_opt.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c">
|
||||
<Filter>zstd</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c">
|
||||
<Filter>getopt</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\csvexport.cpp">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\bitstream.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compiler.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\cpu.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\debug.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\error_private.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\fse.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\hist.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\huf.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\mem.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\pool.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\threading.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\xxhash.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ddict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_fast.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_internal.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_lazy.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_ldm.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_opt.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h">
|
||||
<Filter>getopt</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
311
csvexport/src/csvexport.cpp
Normal file
@@ -0,0 +1,311 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../server/TracyFileRead.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
void print_usage_exit(int e)
|
||||
{
|
||||
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
|
||||
fprintf(stderr, "Usage:\n");
|
||||
fprintf(stderr, " extract [OPTION...] <trace file>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -h, --help Print usage\n");
|
||||
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
|
||||
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
|
||||
fprintf(stderr, " -c, --case Case sensitive filtering\n");
|
||||
fprintf(stderr, " -e, --self Get self times\n");
|
||||
fprintf(stderr, " -u, --unwrap Report each zone event\n");
|
||||
|
||||
exit(e);
|
||||
}
|
||||
|
||||
struct Args {
|
||||
const char* filter;
|
||||
const char* separator;
|
||||
const char* trace_file;
|
||||
bool case_sensitive;
|
||||
bool self_time;
|
||||
bool unwrap;
|
||||
};
|
||||
|
||||
Args parse_args(int argc, char** argv)
|
||||
{
|
||||
if (argc == 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
Args args = { "", ",", "", false, false, false };
|
||||
|
||||
struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "filter", optional_argument, NULL, 'f' },
|
||||
{ "sep", optional_argument, NULL, 's' },
|
||||
{ "case", no_argument, NULL, 'c' },
|
||||
{ "self", no_argument, NULL, 'e' },
|
||||
{ "unwrap", no_argument, NULL, 'u' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_usage_exit(0);
|
||||
break;
|
||||
case 'f':
|
||||
args.filter = optarg;
|
||||
break;
|
||||
case 's':
|
||||
args.separator = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
args.case_sensitive = true;
|
||||
break;
|
||||
case 'e':
|
||||
args.self_time = true;
|
||||
break;
|
||||
case 'u':
|
||||
args.unwrap = true;
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind + 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
args.trace_file = argv[optind];
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
bool is_substring(
|
||||
const char* term,
|
||||
const char* s,
|
||||
bool case_sensitive = false
|
||||
){
|
||||
auto new_term = std::string(term);
|
||||
auto new_s = std::string(s);
|
||||
|
||||
if (!case_sensitive) {
|
||||
std::transform(
|
||||
new_term.begin(),
|
||||
new_term.end(),
|
||||
new_term.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
|
||||
std::transform(
|
||||
new_s.begin(),
|
||||
new_s.end(),
|
||||
new_s.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
}
|
||||
|
||||
return new_s.find(new_term) != std::string::npos;
|
||||
}
|
||||
|
||||
const char* get_name(int32_t id, const tracy::Worker& worker)
|
||||
{
|
||||
auto& srcloc = worker.GetSourceLocation(id);
|
||||
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(const T& v, const char* sep) {
|
||||
std::ostringstream s;
|
||||
for (const auto& i : v) {
|
||||
if (&i != &v[0]) {
|
||||
s << sep;
|
||||
}
|
||||
s << i;
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// From TracyView.cpp
|
||||
int64_t GetZoneChildTimeFast(
|
||||
const tracy::Worker& worker,
|
||||
const tracy::ZoneEvent& zone
|
||||
){
|
||||
int64_t time = 0;
|
||||
if( zone.HasChildren() )
|
||||
{
|
||||
auto& children = worker.GetZoneChildren( zone.Child() );
|
||||
if( children.is_magic() )
|
||||
{
|
||||
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
|
||||
for( auto& v : vec )
|
||||
{
|
||||
assert( v.IsEndValid() );
|
||||
time += v.End() - v.Start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( auto& v : children )
|
||||
{
|
||||
assert( v->IsEndValid() );
|
||||
time += v->End() - v->Start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!AttachConsole(ATTACH_PARENT_PROCESS))
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
|
||||
}
|
||||
#endif
|
||||
|
||||
Args args = parse_args(argc, argv);
|
||||
|
||||
auto f = std::unique_ptr<tracy::FileRead>(
|
||||
tracy::FileRead::Open(args.trace_file)
|
||||
);
|
||||
if (!f)
|
||||
{
|
||||
fprintf(stderr, "Could not open file %s\n", args.trace_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto worker = tracy::Worker(*f);
|
||||
|
||||
while (!worker.AreSourceLocationZonesReady())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
auto& slz = worker.GetSourceLocationZones();
|
||||
tracy::Vector<decltype(slz.begin())> slz_selected;
|
||||
slz_selected.reserve(slz.size());
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for(auto it = slz.begin(); it != slz.end(); ++it)
|
||||
{
|
||||
if(it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if(args.filter[0] == '\0')
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name(it->first, worker);
|
||||
if(is_substring(args.filter, name, args.case_sensitive))
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
if (args.unwrap)
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns"
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "total_ns", "total_perc",
|
||||
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
|
||||
};
|
||||
}
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for(auto& it : slz_selected)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
|
||||
values[0] = get_name(it->first, worker);
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation(it->first);
|
||||
values[1] = worker.GetString(srcloc.file);
|
||||
values[2] = std::to_string(srcloc.line);
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
|
||||
if (args.unwrap)
|
||||
{
|
||||
int i = 0;
|
||||
for (const auto& zone_thread_data : zone_data.zones) {
|
||||
const auto zone_event = zone_thread_data.Zone();
|
||||
const auto start = zone_event->Start();
|
||||
const auto end = zone_event->End();
|
||||
|
||||
values[3] = std::to_string(start);
|
||||
|
||||
auto timespan = end - start;
|
||||
if (args.self_time) {
|
||||
timespan -= GetZoneChildTimeFast(worker, *zone_event);
|
||||
}
|
||||
values[4] = std::to_string(timespan);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
|
||||
values[3] = std::to_string(time);
|
||||
values[4] = std::to_string(100. * time / last_time);
|
||||
|
||||
values[5] = std::to_string(zone_data.zones.size());
|
||||
|
||||
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
|
||||
/ zone_data.zones.size();
|
||||
values[6] = std::to_string(avg);
|
||||
|
||||
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
|
||||
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
|
||||
values[7] = std::to_string(tmin);
|
||||
values[8] = std::to_string(tmax);
|
||||
|
||||
const auto sz = zone_data.zones.size();
|
||||
const auto ss = zone_data.sumSq
|
||||
- 2. * zone_data.total * avg
|
||||
+ avg * avg * sz;
|
||||
const auto std = sqrt(ss / (sz - 1));
|
||||
values[9] = std::to_string(std);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
BIN
doc/compare.png
|
Before Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 16 KiB |
BIN
doc/locks.png
|
Before Width: | Height: | Size: 8.3 KiB |
BIN
doc/messages.png
|
Before Width: | Height: | Size: 4.6 KiB |
BIN
doc/plot.png
|
Before Width: | Height: | Size: 9.3 KiB |
BIN
doc/profiler.png
|
Before Width: | Height: | Size: 72 KiB After Width: | Height: | Size: 284 KiB |
BIN
doc/profiler2.png
Normal file
|
After Width: | Height: | Size: 162 KiB |
14
examples/OpenCLVectorAdd/CMakeLists.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
project(OpenCLVectorAdd)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
|
||||
|
||||
add_library(TracyClient STATIC ../../TracyClient.cpp
|
||||
../../TracyOpenCL.hpp)
|
||||
target_include_directories(TracyClient PUBLIC ../../)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
|
||||
|
||||
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient)
|
||||
190
examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp
Normal file
@@ -0,0 +1,190 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <Tracy.hpp>
|
||||
#include <TracyOpenCL.hpp>
|
||||
|
||||
#define CL_ASSERT(err) \
|
||||
if((err) != CL_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "OpenCL Call Returned " << err << std::endl; \
|
||||
assert(false); \
|
||||
}
|
||||
|
||||
const char kernelSource[] =
|
||||
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
|
||||
" { "
|
||||
" int i = get_global_id(0); "
|
||||
" if (i < N) { "
|
||||
" C[i] = A[i] + B[i]; "
|
||||
" } "
|
||||
" } ";
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue commandQueue;
|
||||
cl_kernel vectorAddKernel;
|
||||
cl_program program;
|
||||
cl_int err;
|
||||
cl_mem bufferA, bufferB, bufferC;
|
||||
|
||||
TracyCLCtx tracyCLCtx;
|
||||
|
||||
{
|
||||
ZoneScopedN("OpenCL Init");
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
|
||||
|
||||
if (numPlatforms == 0)
|
||||
{
|
||||
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
|
||||
|
||||
size_t platformNameBufferSize = 0;
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
|
||||
std::string platformName(platformNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Platform: " << platformName << std::endl;
|
||||
|
||||
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
|
||||
size_t deviceNameBufferSize = 0;
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
|
||||
std::string deviceName(deviceNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Device: " << deviceName << std::endl;
|
||||
|
||||
err = CL_SUCCESS;
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
size_t kernelSourceLength = sizeof(kernelSource);
|
||||
const char* kernelSourceArray = { kernelSource };
|
||||
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
|
||||
{
|
||||
size_t programBuildLogBufferSize = 0;
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
|
||||
std::string programBuildLog(programBuildLogBufferSize, '\0');
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
|
||||
std::clog << programBuildLog << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT(err);
|
||||
}
|
||||
|
||||
tracyCLCtx = TracyCLContext(context, device);
|
||||
|
||||
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
|
||||
std::vector<float> hostA, hostB, hostC;
|
||||
|
||||
{
|
||||
ZoneScopedN("Host Data Init");
|
||||
hostA.resize(N);
|
||||
hostB.resize(N);
|
||||
hostC.resize(N);
|
||||
|
||||
std::iota(std::begin(hostA), std::end(hostA), 0);
|
||||
std::iota(std::begin(hostB), std::end(hostB), 0);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Host to Device Memory Copy");
|
||||
|
||||
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
cl_event writeBufferAEvent, writeBufferBEvent;
|
||||
{
|
||||
ZoneScopedN("Write Buffer A");
|
||||
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_TRUE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferAEvent);
|
||||
}
|
||||
{
|
||||
ZoneScopedN("Write Buffer B");
|
||||
TracyCLZone(tracyCLCtx, "Write BufferB");
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_TRUE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferBEvent);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
{
|
||||
ZoneScopedN("VectorAdd Kernel Launch");
|
||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(int), &static_cast<int>(N)));
|
||||
|
||||
cl_event vectorAddKernelEvent;
|
||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||
|
||||
CL_ASSERT(clWaitForEvents(1, &vectorAddKernelEvent));
|
||||
|
||||
TracyCLZoneSetEvent(vectorAddKernelEvent);
|
||||
|
||||
cl_ulong kernelStartTime, kernelEndTime;
|
||||
CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernelStartTime, nullptr));
|
||||
CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernelEndTime, nullptr));
|
||||
std::cout << "VectorAdd Kernel Elapsed: " << ((kernelEndTime - kernelStartTime) / 1000) << " us" << std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Device to Host Memory Copy");
|
||||
TracyCLZone(tracyCLCtx, "Read Buffer C");
|
||||
|
||||
cl_event readbufferCEvent;
|
||||
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
|
||||
TracyCLZoneSetEvent(readbufferCEvent);
|
||||
}
|
||||
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
|
||||
{
|
||||
ZoneScopedN("Checking results");
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
assert(hostC[i] == hostA[i] + hostB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Results are correct!" << std::endl;
|
||||
|
||||
TracyCLDestroy(tracyCLCtx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
1
examples/ToyPathTracer/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
Windows/Compiled*Shader.h
|
||||
4
examples/ToyPathTracer/README
Normal file
@@ -0,0 +1,4 @@
|
||||
https://github.com/aras-p/ToyPathTracer
|
||||
|
||||
Modified to render only 10 frames. Client part requires 12 GB, server part
|
||||
requires 6.4 GB.
|
||||
33
examples/ToyPathTracer/Source/Config.h
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#define kBackbufferWidth 1280
|
||||
#define kBackbufferHeight 720
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#define CPU_CAN_DO_SIMD 0
|
||||
#define CPU_CAN_DO_THREADS 0
|
||||
#else
|
||||
#define CPU_CAN_DO_SIMD 1
|
||||
#define CPU_CAN_DO_THREADS 1
|
||||
#endif
|
||||
|
||||
|
||||
#define DO_SAMPLES_PER_PIXEL 4
|
||||
#define DO_ANIMATE_SMOOTHING 0.9f
|
||||
#define DO_LIGHT_SAMPLING 1
|
||||
#define DO_MITSUBA_COMPARE 0
|
||||
|
||||
// Should path tracing be done on the GPU with a compute shader?
|
||||
#define DO_COMPUTE_GPU 0
|
||||
#define kCSGroupSizeX 8
|
||||
#define kCSGroupSizeY 8
|
||||
#define kCSMaxObjects 64
|
||||
|
||||
// Should float3 struct use SSE/NEON?
|
||||
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
|
||||
|
||||
// Should HitSpheres function use SSE/NEON?
|
||||
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)
|
||||
192
examples/ToyPathTracer/Source/MathSimd.h
Normal file
@@ -0,0 +1,192 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define VM_INLINE __forceinline
|
||||
#else
|
||||
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
|
||||
#endif
|
||||
|
||||
#define kSimdWidth 4
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
|
||||
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
|
||||
VM_INLINE explicit float4(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
|
||||
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
|
||||
return v.getX();
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
|
||||
#else
|
||||
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
|
||||
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
|
||||
#else
|
||||
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
|
||||
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
|
||||
#endif
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
|
||||
|
||||
#elif !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#define USE_NEON 1
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float4(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
a.m = vbslq_f32(cond.m, b.m, a.m);
|
||||
return a;
|
||||
}
|
||||
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
|
||||
{
|
||||
return vbslq_f32(cond.m, b, a);
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v)
|
||||
{
|
||||
float32x4_t V = v.m;
|
||||
float32x4_t S0 = vrsqrteq_f32(V);
|
||||
float32x4_t P0 = vmulq_f32( V, S0 );
|
||||
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
|
||||
float32x4_t S1 = vmulq_f32( S0, R0 );
|
||||
float32x4_t P1 = vmulq_f32( V, S1 );
|
||||
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
|
||||
float32x4_t S2 = vmulq_f32( S1, R1 );
|
||||
float32x4_t P2 = vmulq_f32( V, S2 );
|
||||
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
|
||||
float32x4_t S3 = vmulq_f32( S2, R2 );
|
||||
return float4(vmulq_f32(V, S3));
|
||||
}
|
||||
|
||||
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
|
||||
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
|
||||
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
|
||||
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
|
||||
|
||||
#endif
|
||||
203
examples/ToyPathTracer/Source/Maths.cpp
Normal file
@@ -0,0 +1,203 @@
|
||||
#include "Maths.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint32_t XorShift32(uint32_t& state)
|
||||
{
|
||||
uint32_t x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(uint32_t& state)
|
||||
{
|
||||
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do
|
||||
{
|
||||
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
|
||||
} while (dot(p,p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomInUnitSphere(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do {
|
||||
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
|
||||
} while (sqLength(p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomUnitVector(uint32_t& state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * kPI;
|
||||
float r = sqrtf(1.0f - z * z);
|
||||
float x = r * cosf(a);
|
||||
float y = r * sinf(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
|
||||
{
|
||||
#if DO_HIT_SPHERES_SIMD
|
||||
float4 hitT = float4(tMax);
|
||||
#if USE_NEON
|
||||
int32x4_t id = vdupq_n_s32(-1);
|
||||
#else
|
||||
__m128i id = _mm_set1_epi32(-1);
|
||||
#endif
|
||||
|
||||
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
|
||||
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
|
||||
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
|
||||
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
|
||||
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
|
||||
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
|
||||
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
|
||||
#elif DO_FLOAT3_WITH_SIMD
|
||||
float4 rOrigX = splatX(r.orig.m);
|
||||
float4 rOrigY = splatY(r.orig.m);
|
||||
float4 rOrigZ = splatZ(r.orig.m);
|
||||
float4 rDirX = splatX(r.dir.m);
|
||||
float4 rDirY = splatY(r.dir.m);
|
||||
float4 rDirZ = splatZ(r.dir.m);
|
||||
#else
|
||||
float4 rOrigX = float4(r.orig.x);
|
||||
float4 rOrigY = float4(r.orig.y);
|
||||
float4 rOrigZ = float4(r.orig.z);
|
||||
float4 rDirX = float4(r.dir.x);
|
||||
float4 rDirY = float4(r.dir.y);
|
||||
float4 rDirZ = float4(r.dir.z);
|
||||
#endif
|
||||
float4 tMin4 = float4(tMin);
|
||||
#if USE_NEON
|
||||
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
|
||||
#else
|
||||
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
|
||||
#endif
|
||||
// process 4 spheres at once
|
||||
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
|
||||
{
|
||||
// load data for 4 spheres
|
||||
float4 sCenterX = float4(spheres.centerX + i);
|
||||
float4 sCenterY = float4(spheres.centerY + i);
|
||||
float4 sCenterZ = float4(spheres.centerZ + i);
|
||||
float4 sSqRadius = float4(spheres.sqRadius + i);
|
||||
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
|
||||
float4 coX = sCenterX - rOrigX;
|
||||
float4 coY = sCenterY - rOrigY;
|
||||
float4 coZ = sCenterZ - rOrigZ;
|
||||
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
|
||||
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
|
||||
float4 discr = nb * nb - c;
|
||||
bool4 discrPos = discr > float4(0.0f);
|
||||
// if ray hits any of the 4 spheres
|
||||
if (any(discrPos))
|
||||
{
|
||||
float4 discrSq = sqrtf(discr);
|
||||
|
||||
// ray could hit spheres at t0 & t1
|
||||
float4 t0 = nb - discrSq;
|
||||
float4 t1 = nb + discrSq;
|
||||
|
||||
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
|
||||
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
|
||||
// if hit, take it
|
||||
id = select(id, curId, msk);
|
||||
hitT = select(hitT, t, msk);
|
||||
}
|
||||
#if USE_NEON
|
||||
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
|
||||
#else
|
||||
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
|
||||
#endif
|
||||
}
|
||||
// now we have up to 4 hits, find and return closest one
|
||||
float minT = hmin(hitT);
|
||||
if (minT < tMax) // any actual hits?
|
||||
{
|
||||
int minMask = mask(hitT == float4(minT));
|
||||
if (minMask != 0)
|
||||
{
|
||||
int id_scalar[4];
|
||||
float hitT_scalar[4];
|
||||
#if USE_NEON
|
||||
vst1q_s32(id_scalar, id);
|
||||
vst1q_f32(hitT_scalar, hitT.m);
|
||||
#else
|
||||
_mm_storeu_si128((__m128i *)id_scalar, id);
|
||||
_mm_storeu_ps(hitT_scalar, hitT.m);
|
||||
#endif
|
||||
|
||||
// In general, you would do this with a bit scan (first set/trailing zero count).
|
||||
// But who cares, it's only 16 options.
|
||||
static const int laneId[16] =
|
||||
{
|
||||
0, 0, 1, 0, // 00xx
|
||||
2, 0, 1, 0, // 01xx
|
||||
3, 0, 1, 0, // 10xx
|
||||
2, 0, 1, 0, // 11xx
|
||||
};
|
||||
|
||||
int lane = laneId[minMask];
|
||||
int hitId = id_scalar[lane];
|
||||
float finalHitT = hitT_scalar[lane];
|
||||
|
||||
outHit.pos = r.pointAt(finalHitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
|
||||
outHit.t = finalHitT;
|
||||
return hitId;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
#else // #if DO_HIT_SPHERES_SIMD
|
||||
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < spheres.count; ++i)
|
||||
{
|
||||
float coX = spheres.centerX[i] - r.orig.getX();
|
||||
float coY = spheres.centerY[i] - r.orig.getY();
|
||||
float coZ = spheres.centerZ[i] - r.orig.getZ();
|
||||
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
|
||||
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrtf(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = r.pointAt(hitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
|
||||
outHit.t = hitT;
|
||||
return id;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#endif // #else of #if DO_HIT_SPHERES_SIMD
|
||||
}
|
||||
436
examples/ToyPathTracer/Source/Maths.h
Normal file
@@ -0,0 +1,436 @@
|
||||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include "Config.h"
|
||||
#include "MathSimd.h"
|
||||
|
||||
#define kPI 3.1415926f
|
||||
|
||||
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
|
||||
#if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
|
||||
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
|
||||
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
|
||||
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
|
||||
VM_INLINE explicit float3(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
|
||||
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
|
||||
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = _mm_move_ss(m, _mm_set_ss(x));
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
v = min(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
v = max(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
#else // #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float3(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
|
||||
VM_INLINE float3 yzx() const
|
||||
{
|
||||
float32x2_t low = vget_low_f32(m);
|
||||
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
|
||||
return float3(yzx);
|
||||
}
|
||||
VM_INLINE float3 zxy() const
|
||||
{
|
||||
float32x4_t p = m;
|
||||
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
|
||||
return float3(p);
|
||||
}
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = vsetq_lane_f32(x, m, 0);
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
m = vsetq_lane_f32(y, m, 1);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
m = vsetq_lane_f32(z, m, 2);
|
||||
}
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float32x4_t rcp_2(float32x4_t v)
|
||||
{
|
||||
float32x4_t e = vrecpeq_f32(v);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
|
||||
return vget_lane_f32(maxOfMaxOfHalfs, 0);
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
|
||||
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
#else // #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
// ---- Simple scalar C implementation
|
||||
|
||||
|
||||
struct float3
|
||||
{
|
||||
float3() : x(0), y(0), z(0) {}
|
||||
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
|
||||
|
||||
float3 operator-() const { return float3(-x, -y, -z); }
|
||||
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
|
||||
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
|
||||
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
|
||||
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
|
||||
|
||||
VM_INLINE float getX() const { return x; }
|
||||
VM_INLINE float getY() const { return y; }
|
||||
VM_INLINE float getZ() const { return z; }
|
||||
VM_INLINE void setX(float x_) { x = x_; }
|
||||
VM_INLINE void setY(float y_) { y = y_; }
|
||||
VM_INLINE void setZ(float z_) { z = z_; }
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
|
||||
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
|
||||
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
|
||||
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
|
||||
VM_INLINE float3 cross(const float3& a, const float3& b)
|
||||
{
|
||||
return float3(
|
||||
a.y*b.z - a.z*b.y,
|
||||
-(a.x*b.z - a.z*b.x),
|
||||
a.x*b.y - a.y*b.x
|
||||
);
|
||||
}
|
||||
#endif // #else of #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
|
||||
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
|
||||
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
|
||||
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
|
||||
|
||||
|
||||
inline void AssertUnit(float3 v)
|
||||
{
|
||||
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
|
||||
}
|
||||
|
||||
inline float3 reflect(float3 v, float3 n)
|
||||
{
|
||||
return v - 2*dot(v,n)*n;
|
||||
}
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
|
||||
{
|
||||
AssertUnit(v);
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint*nint*(1-dt*dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1-ri) / (1+ri);
|
||||
r0 = r0*r0;
|
||||
return r0 + (1-r0)*powf(1-cosine, 5);
|
||||
}
|
||||
|
||||
struct Ray
|
||||
{
|
||||
Ray() {}
|
||||
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
|
||||
|
||||
float3 pointAt(float t) const { return orig + dir * t; }
|
||||
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
Sphere() : radius(1.0f), invRadius(0.0f) {}
|
||||
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
|
||||
|
||||
void UpdateDerivedData() { invRadius = 1.0f/radius; }
|
||||
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
|
||||
// data for all spheres in a "structure of arrays" layout
|
||||
struct SpheresSoA
|
||||
{
|
||||
SpheresSoA(int c)
|
||||
{
|
||||
count = c;
|
||||
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
|
||||
// enough space
|
||||
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
|
||||
centerX = new float[simdCount];
|
||||
centerY = new float[simdCount];
|
||||
centerZ = new float[simdCount];
|
||||
sqRadius = new float[simdCount];
|
||||
invRadius = new float[simdCount];
|
||||
// set all data to "impossible sphere" state
|
||||
for (int i = count; i < simdCount; ++i)
|
||||
{
|
||||
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
|
||||
sqRadius[i] = 0.0f;
|
||||
invRadius[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
~SpheresSoA()
|
||||
{
|
||||
delete[] centerX;
|
||||
delete[] centerY;
|
||||
delete[] centerZ;
|
||||
delete[] sqRadius;
|
||||
delete[] invRadius;
|
||||
}
|
||||
float* centerX;
|
||||
float* centerY;
|
||||
float* centerZ;
|
||||
float* sqRadius;
|
||||
float* invRadius;
|
||||
int simdCount;
|
||||
int count;
|
||||
};
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
|
||||
|
||||
float RandomFloat01(uint32_t& state);
|
||||
float3 RandomInUnitDisk(uint32_t& state);
|
||||
float3 RandomInUnitSphere(uint32_t& state);
|
||||
float3 RandomUnitVector(uint32_t& state);
|
||||
|
||||
struct Camera
|
||||
{
|
||||
Camera() {}
|
||||
// vfov is top to bottom in degrees
|
||||
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
|
||||
{
|
||||
lensRadius = aperture / 2;
|
||||
float theta = vfov*kPI/180;
|
||||
float halfHeight = tanf(theta/2);
|
||||
float halfWidth = aspect * halfHeight;
|
||||
origin = lookFrom;
|
||||
w = normalize(lookFrom - lookAt);
|
||||
u = normalize(cross(vup, w));
|
||||
v = cross(w, u);
|
||||
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
|
||||
horizontal = 2*halfWidth*focusDist*u;
|
||||
vertical = 2*halfHeight*focusDist*v;
|
||||
}
|
||||
|
||||
Ray GetRay(float s, float t, uint32_t& state) const
|
||||
{
|
||||
float3 rd = lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = u * rd.getX() + v * rd.getY();
|
||||
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
|
||||
}
|
||||
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
392
examples/ToyPathTracer/Source/Test.cpp
Normal file
@@ -0,0 +1,392 @@
|
||||
#include "Config.h"
|
||||
#include "Test.h"
|
||||
#include "Maths.h"
|
||||
#include <algorithm>
|
||||
#if CPU_CAN_DO_THREADS
|
||||
#include "enkiTS/TaskScheduler_c.h"
|
||||
#include <thread>
|
||||
#endif
|
||||
#include <atomic>
|
||||
|
||||
#include "../../../Tracy.hpp"
|
||||
|
||||
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
|
||||
#define DO_BIG_SCENE 1
|
||||
|
||||
static Sphere s_Spheres[] =
|
||||
{
|
||||
{float3(0,-100.5,-1), 100},
|
||||
{float3(2,0,-1), 0.5f},
|
||||
{float3(0,0,-1), 0.5f},
|
||||
{float3(-2,0,-1), 0.5f},
|
||||
{float3(2,0,1), 0.5f},
|
||||
{float3(0,0,1), 0.5f},
|
||||
{float3(-2,0,1), 0.5f},
|
||||
{float3(0.5f,1,0.5f), 0.5f},
|
||||
{float3(-1.5f,1.5f,0.f), 0.3f},
|
||||
#if DO_BIG_SCENE
|
||||
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
|
||||
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
|
||||
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
|
||||
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
|
||||
{float3(1.5f,1.5f,-2), 0.3f},
|
||||
#endif // #if DO_BIG_SCENE
|
||||
};
|
||||
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
|
||||
|
||||
static SpheresSoA s_SpheresSoA(kSphereCount);
|
||||
|
||||
struct Material
|
||||
{
|
||||
enum Type { Lambert, Metal, Dielectric };
|
||||
Type type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
static Material s_SphereMats[kSphereCount] =
|
||||
{
|
||||
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
|
||||
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
|
||||
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
|
||||
#if DO_BIG_SCENE
|
||||
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
|
||||
#endif
|
||||
};
|
||||
|
||||
static int s_EmissiveSpheres[kSphereCount];
|
||||
static int s_EmissiveSphereCount;
|
||||
|
||||
static Camera s_Cam;
|
||||
|
||||
const float kMinT = 0.001f;
|
||||
const float kMaxT = 1.0e7f;
|
||||
const int kMaxDepth = 10;
|
||||
|
||||
|
||||
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
|
||||
{
|
||||
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
|
||||
return outID != -1;
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
|
||||
{
|
||||
ZoneScoped;
|
||||
outLightE = float3(0,0,0);
|
||||
if (mat.type == Material::Lambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = Ray(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < s_EmissiveSphereCount; ++j)
|
||||
{
|
||||
int i = s_EmissiveSpheres[j];
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (&mat == &smat)
|
||||
continue; // skip self
|
||||
const Sphere& s = s_Spheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrtf(1.0f - cosA*cosA);
|
||||
float phi = 2 * kPI * eps2;
|
||||
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
|
||||
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
int hitID;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
|
||||
{
|
||||
float omega = 2 * kPI * (1-cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
AssertUnit(rdir);
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == Material::Metal)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == Material::Dielectric)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1,1,1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = Ray(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = Ray(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1,0,1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
|
||||
{
|
||||
ZoneScoped;
|
||||
Hit rec;
|
||||
int id = 0;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(r, kMinT, kMaxT, rec, id))
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
const Material& mat = s_SphereMats[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
|
||||
// dor Lambert materials, we just did explicit light (emissive) sampling and already
|
||||
// for their contribution, so if next ray bounce hits the light again, don't add
|
||||
// emission
|
||||
doMaterialE = (mat.type != Material::Lambert);
|
||||
#endif
|
||||
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return matE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.getY() + 1.0f);
|
||||
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
static enkiTaskScheduler* g_TS;
|
||||
#endif
|
||||
|
||||
void InitializeTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
g_TS = enkiNewTaskScheduler();
|
||||
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
void ShutdownTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiDeleteTaskScheduler(g_TS);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct JobData
|
||||
{
|
||||
float time;
|
||||
int frameCount;
|
||||
int screenWidth, screenHeight;
|
||||
float* backbuffer;
|
||||
Camera* cam;
|
||||
std::atomic<int> rayCount;
|
||||
unsigned testFlags;
|
||||
};
|
||||
|
||||
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData& data = *(JobData*)data_;
|
||||
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
|
||||
float invWidth = 1.0f / data.screenWidth;
|
||||
float invHeight = 1.0f / data.screenHeight;
|
||||
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
|
||||
if (data.testFlags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(data.testFlags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
int rayCount = 0;
|
||||
for (uint32_t y = start; y < end; ++y)
|
||||
{
|
||||
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
|
||||
for (int x = 0; x < data.screenWidth; ++x)
|
||||
{
|
||||
float3 col(0, 0, 0);
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(x + RandomFloat01(state)) * invWidth;
|
||||
float v = float(y + RandomFloat01(state)) * invHeight;
|
||||
Ray r = data.cam->GetRay(u, v, state);
|
||||
col += Trace(r, 0, rayCount, state);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
|
||||
col = prev * lerpFac + col * (1-lerpFac);
|
||||
col.store(backbuffer);
|
||||
backbuffer += 4;
|
||||
}
|
||||
}
|
||||
data.rayCount += rayCount;
|
||||
}
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
if (testFlags & kFlagAnimate)
|
||||
{
|
||||
s_Spheres[1].center.setY(cosf(time) + 1.0f);
|
||||
s_Spheres[8].center.setZ(sinf(time)*0.3f);
|
||||
}
|
||||
float3 lookfrom(0, 2, 3);
|
||||
float3 lookat(0, 0, 0);
|
||||
float distToFocus = 3;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
float aperture = 0.0f;
|
||||
#else
|
||||
float aperture = 0.1f;
|
||||
#endif
|
||||
#if DO_BIG_SCENE
|
||||
aperture *= 0.2f;
|
||||
#endif
|
||||
|
||||
s_EmissiveSphereCount = 0;
|
||||
for (int i = 0; i < kSphereCount; ++i)
|
||||
{
|
||||
Sphere& s = s_Spheres[i];
|
||||
s.UpdateDerivedData();
|
||||
s_SpheresSoA.centerX[i] = s.center.getX();
|
||||
s_SpheresSoA.centerY[i] = s.center.getY();
|
||||
s_SpheresSoA.centerZ[i] = s.center.getZ();
|
||||
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
|
||||
s_SpheresSoA.invRadius[i] = s.invRadius;
|
||||
|
||||
// Remember IDs of emissive spheres (light sources)
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
|
||||
{
|
||||
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
|
||||
s_EmissiveSphereCount++;
|
||||
}
|
||||
}
|
||||
|
||||
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
|
||||
}
|
||||
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData args;
|
||||
args.time = time;
|
||||
args.frameCount = frameCount;
|
||||
args.screenWidth = screenWidth;
|
||||
args.screenHeight = screenHeight;
|
||||
args.backbuffer = backbuffer;
|
||||
args.cam = &s_Cam;
|
||||
args.testFlags = testFlags;
|
||||
args.rayCount = 0;
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
|
||||
bool threaded = true;
|
||||
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
|
||||
enkiWaitForTaskSet(g_TS, task);
|
||||
enkiDeleteTaskSet(task);
|
||||
#else
|
||||
TraceRowJob(0, screenHeight, 0, &args);
|
||||
#endif
|
||||
|
||||
outRayCount = args.rayCount;
|
||||
}
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
|
||||
{
|
||||
ZoneScoped;
|
||||
outCount = kSphereCount;
|
||||
outObjectSize = sizeof(Sphere);
|
||||
outMaterialSize = sizeof(Material);
|
||||
outCamSize = sizeof(Camera);
|
||||
}
|
||||
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
|
||||
{
|
||||
ZoneScoped;
|
||||
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
|
||||
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
|
||||
memcpy(outCam, &s_Cam, sizeof(s_Cam));
|
||||
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
|
||||
*outEmissiveCount = s_EmissiveSphereCount;
|
||||
}
|
||||
17
examples/ToyPathTracer/Source/Test.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
enum TestFlags
|
||||
{
|
||||
kFlagAnimate = (1 << 0),
|
||||
kFlagProgressive = (1 << 1),
|
||||
};
|
||||
|
||||
void InitializeTest();
|
||||
void ShutdownTest();
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);
|
||||
79
examples/ToyPathTracer/Source/enkiTS/Atomics.h
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
#undef GetObject
|
||||
#include <intrin.h>
|
||||
|
||||
extern "C" void _ReadWriteBarrier();
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
|
||||
// Memory Barriers to prevent CPU and Compiler re-ordering
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
|
||||
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
|
||||
#define BASE_ALIGN(x) __declspec( align( x ) )
|
||||
|
||||
#else
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
|
||||
#endif
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
|
||||
// returns old *pDest (so if successfull, returns compareWith)
|
||||
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
|
||||
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _InterlockedExchangeAdd( (long*)pDest, value );
|
||||
#else
|
||||
return __sync_fetch_and_add( pDest, value );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
240
examples/ToyPathTracer/Source/enkiTS/LockLessMultiReadPipe.h
Normal file
@@ -0,0 +1,240 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "Atomics.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
|
||||
// Readers can only read from the back of the pipe
|
||||
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
|
||||
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
|
||||
// Note: using log2 sizes so we do not need to clamp (multi-operation)
|
||||
// T is the contained type
|
||||
// Note this is not true lockless as the use of flags as a form of lock state.
|
||||
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
|
||||
{
|
||||
public:
|
||||
LockLessMultiReadPipe();
|
||||
~LockLessMultiReadPipe() {}
|
||||
|
||||
// ReaderTryReadBack returns false if we were unable to read
|
||||
// This is thread safe for both multiple readers and the writer
|
||||
bool ReaderTryReadBack( T* pOut );
|
||||
|
||||
// WriterTryReadFront returns false if we were unable to read
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryReadFront( T* pOut );
|
||||
|
||||
// WriterTryWriteFront returns false if we were unable to write
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryWriteFront( const T& in );
|
||||
|
||||
// IsPipeEmpty() is a utility function, not intended for general use
|
||||
// Should only be used very prudently.
|
||||
bool IsPipeEmpty() const
|
||||
{
|
||||
return 0 == m_WriteIndex - m_ReadCount;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
m_WriteIndex = 0;
|
||||
m_ReadIndex = 0;
|
||||
m_ReadCount = 0;
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
private:
|
||||
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
|
||||
const static uint32_t ms_cIndexMask = ms_cSize - 1;
|
||||
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
|
||||
|
||||
T m_Buffer[ ms_cSize ];
|
||||
|
||||
// read and write indexes allow fast access to the pipe, but actual access
|
||||
// controlled by the access flags.
|
||||
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
|
||||
volatile uint32_t m_Flags[ ms_cSize ];
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
|
||||
};
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
|
||||
: m_WriteIndex(0)
|
||||
, m_ReadIndex(0)
|
||||
, m_ReadCount(0)
|
||||
{
|
||||
assert( cSizeLog2 < 32 );
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
|
||||
{
|
||||
|
||||
uint32_t actualReadIndex;
|
||||
|
||||
uint32_t readCount = m_ReadCount;
|
||||
|
||||
// We get hold of read index for consistency,
|
||||
// and do first pass starting at read count
|
||||
uint32_t readIndexToUse = readCount;
|
||||
|
||||
|
||||
while(true)
|
||||
{
|
||||
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( readIndexToUse >= writeIndex )
|
||||
{
|
||||
// move back to start
|
||||
readIndexToUse = m_ReadIndex;
|
||||
}
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
actualReadIndex = readIndexToUse & ms_cIndexMask;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange
|
||||
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
++readIndexToUse;
|
||||
|
||||
//update known readcount
|
||||
readCount = m_ReadCount;
|
||||
}
|
||||
|
||||
// we update the read index using an atomic add, as we've only read one piece of data.
|
||||
// this ensure consistency of the read index, and the above loop ensures readers
|
||||
// only read from unread data
|
||||
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
|
||||
|
||||
BASE_MEMORYBARRIER_ACQUIRE();
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
|
||||
{
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
uint32_t frontReadIndex = writeIndex;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
|
||||
uint32_t previous = FLAG_INVALID;
|
||||
uint32_t actualReadIndex = 0;
|
||||
while( true )
|
||||
{
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t readCount = m_ReadCount;
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe || 0 == frontReadIndex )
|
||||
{
|
||||
// frontReadIndex can get to 0 here if that item was just being read by another thread.
|
||||
m_ReadIndex = readCount;
|
||||
return false;
|
||||
}
|
||||
--frontReadIndex;
|
||||
actualReadIndex = frontReadIndex & ms_cIndexMask;
|
||||
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( m_ReadIndex >= frontReadIndex )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and writer owns the write index
|
||||
// we only move one back as this is as many as we have read, not where we have read from.
|
||||
--m_WriteIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
|
||||
{
|
||||
// The writer 'owns' the write index, and readers can only reduce
|
||||
// the amount of data in the pipe.
|
||||
// We get hold of both values for consistency and to reduce false sharing
|
||||
// impacting more than one access
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
|
||||
|
||||
// a reader may still be reading this item, as there are multiple readers
|
||||
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
|
||||
{
|
||||
return false; // still being read, so have caught up with tail.
|
||||
}
|
||||
|
||||
|
||||
// as we are the only writer we can update the data without atomics
|
||||
// whilst the write index has not been updated
|
||||
m_Buffer[ actualWriteIndex ] = in;
|
||||
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
|
||||
|
||||
// We need to ensure the above writes occur prior to updating the write index,
|
||||
// otherwise another thread might read before it's finished
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and the writer controls the write index
|
||||
++writeIndex;
|
||||
m_WriteIndex = writeIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
437
examples/ToyPathTracer/Source/enkiTS/TaskScheduler.cpp
Normal file
@@ -0,0 +1,437 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "TaskScheduler.h"
|
||||
#include "LockLessMultiReadPipe.h"
|
||||
|
||||
|
||||
|
||||
using namespace enki;
|
||||
|
||||
|
||||
static const uint32_t PIPESIZE_LOG2 = 8;
|
||||
static const uint32_t SPIN_COUNT = 100;
|
||||
static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
|
||||
static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
|
||||
|
||||
// each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
|
||||
static THREAD_LOCAL uint32_t gtl_threadNum = 0;
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct SubTaskSet
|
||||
{
|
||||
ITaskSet* pTask;
|
||||
TaskSetPartition partition;
|
||||
};
|
||||
|
||||
// we derive class TaskPipe rather than typedef to get forward declaration working easily
|
||||
class TaskPipe : public LockLessMultiReadPipe<PIPESIZE_LOG2,enki::SubTaskSet> {};
|
||||
|
||||
struct ThreadArgs
|
||||
{
|
||||
uint32_t threadNum;
|
||||
TaskScheduler* pTaskScheduler;
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
|
||||
{
|
||||
SubTaskSet splitTask = subTask_;
|
||||
uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
|
||||
|
||||
if( rangeToSplit_ > rangeLeft )
|
||||
{
|
||||
rangeToSplit_ = rangeLeft;
|
||||
}
|
||||
splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
|
||||
subTask_.partition.start = splitTask.partition.end;
|
||||
return splitTask;
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _M_IX86 || defined _M_X64
|
||||
#pragma intrinsic(_mm_pause)
|
||||
inline void Pause() { _mm_pause(); }
|
||||
#endif
|
||||
#elif defined __i386__ || defined __x86_64__
|
||||
inline void Pause() { __asm__ __volatile__("pause;"); }
|
||||
#else
|
||||
inline void Pause() { ;} // may have NOP or yield equiv
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
|
||||
{
|
||||
if( func_ )
|
||||
{
|
||||
func_(threadnum_);
|
||||
}
|
||||
}
|
||||
|
||||
ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
|
||||
{
|
||||
return &m_ProfilerCallbacks;
|
||||
}
|
||||
|
||||
THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
|
||||
{
|
||||
ThreadArgs args = *(ThreadArgs*)pArgs;
|
||||
uint32_t threadNum = args.threadNum;
|
||||
TaskScheduler* pTS = args.pTaskScheduler;
|
||||
gtl_threadNum = threadNum;
|
||||
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
|
||||
|
||||
uint32_t spinCount = 0;
|
||||
uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
|
||||
while( pTS->m_bRunning )
|
||||
{
|
||||
if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
|
||||
{
|
||||
// no tasks, will spin then wait
|
||||
++spinCount;
|
||||
if( spinCount > SPIN_COUNT )
|
||||
{
|
||||
pTS->WaitForTasks( threadNum );
|
||||
spinCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
|
||||
while( spinBackoffCount )
|
||||
{
|
||||
Pause();
|
||||
--spinBackoffCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
spinCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void TaskScheduler::StartThreads()
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_bRunning = true;
|
||||
|
||||
SemaphoreCreate( m_NewTaskSemaphore );
|
||||
|
||||
// we create one less thread than m_NumThreads as the main thread counts as one
|
||||
m_pThreadNumStore = new ThreadArgs[m_NumThreads];
|
||||
m_pThreadIDs = new threadid_t[m_NumThreads];
|
||||
m_pThreadNumStore[0].threadNum = 0;
|
||||
m_pThreadNumStore[0].pTaskScheduler = this;
|
||||
m_pThreadIDs[0] = 0;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 1;// acount for main thread
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
m_pThreadNumStore[thread].threadNum = thread;
|
||||
m_pThreadNumStore[thread].pTaskScheduler = this;
|
||||
ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
|
||||
++m_NumThreadsRunning;
|
||||
}
|
||||
|
||||
// ensure we have sufficient tasks to equally fill either all threads including main
|
||||
// or just the threads we've launched, this is outside the firstinit as we want to be able
|
||||
// to runtime change it
|
||||
if( 1 == m_NumThreads )
|
||||
{
|
||||
m_NumPartitions = 1;
|
||||
m_NumInitialPartitions = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
|
||||
m_NumInitialPartitions = m_NumThreads - 1;
|
||||
if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
|
||||
{
|
||||
m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
m_bHaveThreads = true;
|
||||
}
|
||||
|
||||
void TaskScheduler::StopThreads( bool bWait_ )
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
// wait for them threads quit before deleting data
|
||||
m_bRunning = false;
|
||||
while( bWait_ && m_NumThreadsRunning > 1 )
|
||||
{
|
||||
// keep firing event to ensure all threads pick up state of m_bRunning
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
|
||||
}
|
||||
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
ThreadTerminate( m_pThreadIDs[thread] );
|
||||
}
|
||||
|
||||
m_NumThreads = 0;
|
||||
delete[] m_pThreadNumStore;
|
||||
delete[] m_pThreadIDs;
|
||||
m_pThreadNumStore = 0;
|
||||
m_pThreadIDs = 0;
|
||||
SemaphoreClose( m_NewTaskSemaphore );
|
||||
|
||||
m_bHaveThreads = false;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
|
||||
{
|
||||
// check for tasks
|
||||
SubTaskSet subTask;
|
||||
bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
|
||||
|
||||
uint32_t threadToCheck = hintPipeToCheck_io_;
|
||||
uint32_t checkCount = 0;
|
||||
while( !bHaveTask && checkCount < m_NumThreads )
|
||||
{
|
||||
threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
|
||||
if( threadToCheck != threadNum )
|
||||
{
|
||||
bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
|
||||
}
|
||||
++checkCount;
|
||||
}
|
||||
|
||||
if( bHaveTask )
|
||||
{
|
||||
// update hint, will preserve value unless actually got task from another thread.
|
||||
hintPipeToCheck_io_ = threadToCheck;
|
||||
|
||||
uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
|
||||
if( subTask.pTask->m_RangeToRun < partitionSize )
|
||||
{
|
||||
SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
|
||||
SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
|
||||
taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
|
||||
AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// the task has already been divided up by AddTaskSetToPipe, so just run it
|
||||
subTask.pTask->ExecuteRange( subTask.partition, threadNum );
|
||||
AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
}
|
||||
|
||||
return bHaveTask;
|
||||
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitForTasks( uint32_t threadNum )
|
||||
{
|
||||
// We incrememt the number of threads waiting here in order
|
||||
// to ensure that the check for tasks occurs after the increment
|
||||
// to prevent a task being added after a check, then the thread waiting.
|
||||
// This will occasionally result in threads being mistakenly awoken,
|
||||
// but they will then go back to sleep.
|
||||
AtomicAdd( &m_NumThreadsWaiting, 1 );
|
||||
|
||||
bool bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !bHaveTasks )
|
||||
{
|
||||
SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
|
||||
SemaphoreWait( m_NewTaskSemaphore );
|
||||
SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
|
||||
}
|
||||
|
||||
int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
|
||||
assert( prev != 0 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WakeThreads()
|
||||
{
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
|
||||
}
|
||||
|
||||
void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ )
|
||||
{
|
||||
int32_t numAdded = 0;
|
||||
while( subTask_.partition.start != subTask_.partition.end )
|
||||
{
|
||||
SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
|
||||
|
||||
// add the partition to the pipe
|
||||
++numAdded;
|
||||
if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
|
||||
{
|
||||
if( numAdded > 1 )
|
||||
{
|
||||
WakeThreads();
|
||||
}
|
||||
// alter range to run the appropriate fraction
|
||||
if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
|
||||
{
|
||||
taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
|
||||
subTask_.partition.start = taskToAdd.partition.end;
|
||||
}
|
||||
taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
|
||||
--numAdded;
|
||||
}
|
||||
}
|
||||
|
||||
// increment running count by number added
|
||||
AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
|
||||
|
||||
WakeThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
|
||||
{
|
||||
// set running count to -1 to guarantee it won't be found complete until all subtasks added
|
||||
pTaskSet->m_RunningCount = -1;
|
||||
|
||||
// divide task up and add to pipe
|
||||
pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
|
||||
if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
|
||||
|
||||
uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
|
||||
if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
|
||||
|
||||
SubTaskSet subTask;
|
||||
subTask.pTask = pTaskSet;
|
||||
subTask.partition.start = 0;
|
||||
subTask.partition.end = pTaskSet->m_SetSize;
|
||||
SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
|
||||
{
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
if( pTaskSet )
|
||||
{
|
||||
while( pTaskSet->m_RunningCount )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
// should add a spin then wait for task completion event.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAll()
|
||||
{
|
||||
bool bHaveTasks = true;
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
int32_t threadsRunning = m_NumThreadsRunning - 1;
|
||||
while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAllAndShutdown()
|
||||
{
|
||||
WaitforAll();
|
||||
StopThreads(true);
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
uint32_t TaskScheduler::GetNumTaskThreads() const
|
||||
{
|
||||
return m_NumThreads;
|
||||
}
|
||||
|
||||
TaskScheduler::TaskScheduler()
|
||||
: m_pPipesPerThread(NULL)
|
||||
, m_NumThreads(0)
|
||||
, m_pThreadNumStore(NULL)
|
||||
, m_pThreadIDs(NULL)
|
||||
, m_bRunning(false)
|
||||
, m_NumThreadsRunning(0)
|
||||
, m_NumThreadsWaiting(0)
|
||||
, m_NumPartitions(0)
|
||||
, m_bHaveThreads(false)
|
||||
{
|
||||
memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler()
|
||||
{
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize( uint32_t numThreads_ )
|
||||
{
|
||||
assert( numThreads_ );
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
delete[] m_pPipesPerThread;
|
||||
|
||||
m_NumThreads = numThreads_;
|
||||
|
||||
m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
|
||||
|
||||
StartThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize()
|
||||
{
|
||||
Initialize( GetNumHardwareThreads() );
|
||||
}
|
||||
177
examples/ToyPathTracer/Source/enkiTS/TaskScheduler.h
Normal file
@@ -0,0 +1,177 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "Threads.h"
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct TaskSetPartition
|
||||
{
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
};
|
||||
|
||||
class TaskScheduler;
|
||||
class TaskPipe;
|
||||
struct ThreadArgs;
|
||||
struct SubTaskSet;
|
||||
|
||||
// Subclass ITaskSet to create tasks.
|
||||
// TaskSets can be re-used, but check
|
||||
class ITaskSet
|
||||
{
|
||||
public:
|
||||
ITaskSet()
|
||||
: m_SetSize(1)
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_, uint32_t minRange_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange( minRange_ )
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(minRange_)
|
||||
{}
|
||||
|
||||
// Execute range should be overloaded to process tasks. It will be called with a
|
||||
// range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
|
||||
// The range values should be mapped so that linearly processing them in order is cache friendly
|
||||
// i.e. neighbouring values should be close together.
|
||||
// threadnum should not be used for changing processing of data, it's intended purpose
|
||||
// is to allow per-thread data buckets for output.
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
|
||||
|
||||
// Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
|
||||
uint32_t m_SetSize;
|
||||
|
||||
// Minimum size of of TaskSetPartition range when splitting a task set into partitions.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
uint32_t m_MinRange;
|
||||
|
||||
bool GetIsComplete()
|
||||
{
|
||||
return 0 == m_RunningCount;
|
||||
}
|
||||
private:
|
||||
friend class TaskScheduler;
|
||||
volatile int32_t m_RunningCount;
|
||||
uint32_t m_RangeToRun;
|
||||
};
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct ProfilerCallbacks
|
||||
{
|
||||
ProfilerCallbackFunc threadStart;
|
||||
ProfilerCallbackFunc threadStop;
|
||||
ProfilerCallbackFunc waitStart;
|
||||
ProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
class TaskScheduler
|
||||
{
|
||||
public:
|
||||
TaskScheduler();
|
||||
~TaskScheduler();
|
||||
|
||||
// Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
|
||||
|
||||
// Initialize() will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void Initialize();
|
||||
|
||||
// Initialize( numThreads_ ) - numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void Initialize( uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Adds the TaskSet to pipe and returns if the pipe is not full.
|
||||
// If the pipe is full, pTaskSet is run.
|
||||
// should only be called from main thread, or within a task
|
||||
void AddTaskSetToPipe( ITaskSet* pTaskSet );
|
||||
|
||||
// Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void WaitforTaskSet( const ITaskSet* pTaskSet );
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAll();
|
||||
|
||||
// Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAllAndShutdown();
|
||||
|
||||
// Returns the number of threads created for running tasks + 1
|
||||
// to account for the main thread.
|
||||
uint32_t GetNumTaskThreads() const;
|
||||
|
||||
// Returns the ProfilerCallbacks structure so that it can be modified to
|
||||
// set the callbacks.
|
||||
ProfilerCallbacks* GetProfilerCallbacks();
|
||||
|
||||
private:
|
||||
static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
|
||||
void WaitForTasks( uint32_t threadNum );
|
||||
bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
|
||||
void StartThreads();
|
||||
void StopThreads( bool bWait_ );
|
||||
void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ );
|
||||
void WakeThreads();
|
||||
|
||||
TaskPipe* m_pPipesPerThread;
|
||||
|
||||
uint32_t m_NumThreads;
|
||||
ThreadArgs* m_pThreadNumStore;
|
||||
threadid_t* m_pThreadIDs;
|
||||
volatile bool m_bRunning;
|
||||
volatile int32_t m_NumThreadsRunning;
|
||||
volatile int32_t m_NumThreadsWaiting;
|
||||
uint32_t m_NumPartitions;
|
||||
uint32_t m_NumInitialPartitions;
|
||||
semaphoreid_t m_NewTaskSemaphore;
|
||||
bool m_bHaveThreads;
|
||||
ProfilerCallbacks m_ProfilerCallbacks;
|
||||
|
||||
TaskScheduler( const TaskScheduler& nocopy );
|
||||
TaskScheduler& operator=( const TaskScheduler& nocopy );
|
||||
};
|
||||
|
||||
}
|
||||
122
examples/ToyPathTracer/Source/enkiTS/TaskScheduler_c.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include "TaskScheduler_c.h"
|
||||
#include "TaskScheduler.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
using namespace enki;
|
||||
|
||||
struct enkiTaskScheduler : TaskScheduler
|
||||
{
|
||||
};
|
||||
|
||||
struct enkiTaskSet : ITaskSet
|
||||
{
|
||||
enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
|
||||
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
|
||||
{
|
||||
taskFun( range.start, range.end, threadnum, pArgs );
|
||||
}
|
||||
|
||||
enkiTaskExecuteRange taskFun;
|
||||
void* pArgs;
|
||||
};
|
||||
|
||||
enkiTaskScheduler* enkiNewTaskScheduler()
|
||||
{
|
||||
enkiTaskScheduler* pETS = new enkiTaskScheduler();
|
||||
return pETS;
|
||||
}
|
||||
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->Initialize();
|
||||
}
|
||||
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
|
||||
{
|
||||
pETS_->Initialize( numThreads_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
delete pETS_;
|
||||
}
|
||||
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
|
||||
{
|
||||
return new enkiTaskSet( taskFunc_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
delete pTaskSet_;
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->m_MinRange = minRange_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
|
||||
}
|
||||
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
pETS_->WaitforTaskSet( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->WaitforAll();
|
||||
}
|
||||
|
||||
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
return pETS_->GetNumTaskThreads();
|
||||
}
|
||||
|
||||
enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
|
||||
return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
|
||||
}
|
||||
|
||||
104
examples/ToyPathTracer/Source/enkiTS/TaskScheduler_c.h
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct enkiTaskScheduler enkiTaskScheduler;
|
||||
typedef struct enkiTaskSet enkiTaskSet;
|
||||
|
||||
typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
|
||||
|
||||
|
||||
// Create a new task scheduler
|
||||
enkiTaskScheduler* enkiNewTaskScheduler();
|
||||
|
||||
// Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Initialize a task scheduler with numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Delete a task scheduler
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Create a task set.
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
|
||||
|
||||
// Delete a task set.
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
|
||||
|
||||
// Schedule the task
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_ );
|
||||
|
||||
// Schedule the task with a minimum range.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
|
||||
|
||||
|
||||
// Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Wait for a given task.
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ );
|
||||
|
||||
|
||||
// get number of threads
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct enkiProfilerCallbacks
|
||||
{
|
||||
enkiProfilerCallbackFunc threadStart;
|
||||
enkiProfilerCallbackFunc threadStop;
|
||||
enkiProfilerCallbackFunc waitStart;
|
||||
enkiProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
// Get the callback structure so it can be set
|
||||
struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
210
examples/ToyPathTracer/Source/enkiTS/Threads.h
Normal file
@@ -0,0 +1,210 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "Atomics.h"
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
|
||||
#define THREADFUNC_DECL DWORD WINAPI
|
||||
#define THREAD_LOCAL __declspec( thread )
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef HANDLE threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
DWORD threadid;
|
||||
*returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
|
||||
return *returnid != NULL;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return CloseHandle( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
return sysInfo.dwNumberOfProcessors;
|
||||
}
|
||||
}
|
||||
|
||||
#else // posix
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#define THREADFUNC_DECL void*
|
||||
#define THREAD_LOCAL __thread
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef pthread_t threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
|
||||
|
||||
return retval == 0;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return pthread_cancel( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // posix
|
||||
|
||||
|
||||
// Semaphore implementation
|
||||
#ifdef _WIN32
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct semaphoreid_t
|
||||
{
|
||||
HANDLE sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
CloseHandle( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
|
||||
|
||||
assert( retval != WAIT_FAILED );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
if( countWaiting )
|
||||
{
|
||||
ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(__MACH__)
|
||||
|
||||
// OS X does not have POSIX semaphores
|
||||
// see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
|
||||
#include <mach/mach.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
semaphore_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_destroy( mach_task_self(), semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_wait( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
semaphore_signal( semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // POSIX
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_init( &semaphoreid.sem, 0, 0 );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
sem_destroy( &semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_wait( &semaphoreid.sem );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
sem_post( &semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
395
examples/ToyPathTracer/Windows/ComputeShader.hlsl
Normal file
@@ -0,0 +1,395 @@
|
||||
#include "../Source/Config.h"
|
||||
|
||||
inline uint RNG(inout uint state)
|
||||
{
|
||||
uint x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(inout uint state)
|
||||
{
|
||||
return (RNG(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(inout uint state)
|
||||
{
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float2 xy = float2(cos(a), sin(a));
|
||||
xy *= sqrt(RandomFloat01(state));
|
||||
return float3(xy, 0);
|
||||
}
|
||||
float3 RandomInUnitSphere(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float t = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(max(0.0, 1.0f - z * z));
|
||||
float x = r * cos(t);
|
||||
float y = r * sin(t);
|
||||
float3 res = float3(x, y, z);
|
||||
res *= pow(RandomFloat01(state), 1.0 / 3.0);
|
||||
return res;
|
||||
}
|
||||
float3 RandomUnitVector(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(1.0f - z * z);
|
||||
float x = r * cos(a);
|
||||
float y = r * sin(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct Ray
|
||||
{
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
Ray MakeRay(float3 orig_, float3 dir_) { Ray r; r.orig = orig_; r.dir = dir_; return r; }
|
||||
float3 RayPointAt(Ray r, float t) { return r.orig + r.dir * t; }
|
||||
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, out float3 outRefracted)
|
||||
{
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint * nint*(1 - dt * dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n * dt) - n * sqrt(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1 - ri) / (1 + ri);
|
||||
r0 = r0 * r0;
|
||||
// note: saturate to guard against possible tiny negative numbers
|
||||
return r0 + (1 - r0)*pow(saturate(1 - cosine), 5);
|
||||
}
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
#define MatLambert 0
|
||||
#define MatMetal 1
|
||||
#define MatDielectric 2
|
||||
|
||||
struct Material
|
||||
{
|
||||
int type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
groupshared Sphere s_GroupSpheres[kCSMaxObjects];
|
||||
groupshared Material s_GroupMaterials[kCSMaxObjects];
|
||||
groupshared int s_GroupEmissives[kCSMaxObjects];
|
||||
|
||||
|
||||
struct Camera
|
||||
{
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
Ray CameraGetRay(Camera cam, float s, float t, inout uint state)
|
||||
{
|
||||
float3 rd = cam.lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = cam.u * rd.x + cam.v * rd.y;
|
||||
return MakeRay(cam.origin + offset, normalize(cam.lowerLeftCorner + s * cam.horizontal + t * cam.vertical - cam.origin - offset));
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(Ray r, int sphereCount, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < sphereCount; ++i)
|
||||
{
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
float3 co = s.center - r.orig;
|
||||
float nb = dot(co, r.dir);
|
||||
float c = dot(co, co) - s.radius*s.radius;
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrt(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = RayPointAt(r, hitT);
|
||||
outHit.normal = (outHit.pos - s_GroupSpheres[id].center) * s_GroupSpheres[id].invRadius;
|
||||
outHit.t = hitT;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
struct Params
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
|
||||
|
||||
#define kMinT 0.001f
|
||||
#define kMaxT 1.0e7f
|
||||
#define kMaxDepth 10
|
||||
|
||||
|
||||
static int HitWorld(int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
return HitSpheres(r, sphereCount, tMin, tMax, outHit);
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(int sphereCount, int emissiveCount, int matID, Ray r_in, Hit rec, out float3 attenuation, out Ray scattered, out float3 outLightE, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
outLightE = float3(0, 0, 0);
|
||||
Material mat = s_GroupMaterials[matID];
|
||||
if (mat.type == MatLambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = MakeRay(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < emissiveCount; ++j)
|
||||
{
|
||||
int i = s_GroupEmissives[j];
|
||||
if (matID == i)
|
||||
continue; // skip self
|
||||
Material smat = s_GroupMaterials[i];
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(abs(sw.x)>0.01f ? float3(0, 1, 0) : float3(1, 0, 0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrt(1.0f - s.radius*s.radius / dot(rec.pos - s.center, rec.pos - s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrt(1.0f - cosA * cosA);
|
||||
float phi = 2 * 3.1415926 * eps2;
|
||||
float3 l = su * cos(phi) * sinA + sv * sin(phi) * sinA + sw * cosA;
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
++inoutRayCount;
|
||||
int hitID = HitWorld(sphereCount, MakeRay(rec.pos, l), kMinT, kMaxT, lightHit);
|
||||
if (hitID == i)
|
||||
{
|
||||
float omega = 2 * 3.1415926 * (1 - cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == MatMetal)
|
||||
{
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = MakeRay(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == MatDielectric)
|
||||
{
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1, 1, 1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = MakeRay(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = MakeRay(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1, 0, 1);
|
||||
scattered = MakeRay(float3(0,0,0), float3(0, 0, 1));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(int sphereCount, int emissiveCount, Ray r, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
float3 col = 0;
|
||||
float3 curAtten = 1;
|
||||
bool doMaterialE = true;
|
||||
// GPUs don't support recursion, so do tracing iterations in a loop up to max depth
|
||||
for (int depth = 0; depth < kMaxDepth; ++depth)
|
||||
{
|
||||
Hit rec;
|
||||
++inoutRayCount;
|
||||
int id = HitWorld(sphereCount, r, kMinT, kMaxT, rec);
|
||||
if (id >= 0)
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
Material mat = s_GroupMaterials[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (Scatter(sphereCount, emissiveCount, id, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = 0;
|
||||
doMaterialE = (mat.type != MatLambert);
|
||||
#endif
|
||||
col += curAtten * (matE + lightE);
|
||||
curAtten *= attenuation;
|
||||
r = scattered;
|
||||
}
|
||||
else
|
||||
{
|
||||
col += curAtten * matE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
col += curAtten * float3(0.15f, 0.21f, 0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.y + 1.0f);
|
||||
float3 skyCol = ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
col += curAtten * skyCol;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
return col;
|
||||
}
|
||||
|
||||
Texture2D srcImage : register(t0);
|
||||
RWTexture2D<float4> dstImage : register(u0);
|
||||
StructuredBuffer<Sphere> g_Spheres : register(t1);
|
||||
StructuredBuffer<Material> g_Materials : register(t2);
|
||||
StructuredBuffer<Params> g_Params : register(t3);
|
||||
StructuredBuffer<int> g_Emissives : register(t4);
|
||||
RWByteAddressBuffer g_OutRayCount : register(u1);
|
||||
|
||||
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
|
||||
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
// First, move scene data (spheres, materials, emissive indices) into group shared
|
||||
// memory. Do this in parallel; each thread in group copies its own chunk of data.
|
||||
uint threadID = tid.y * kCSGroupSizeX + tid.x;
|
||||
uint groupSize = kCSGroupSizeX * kCSGroupSizeY;
|
||||
uint objCount = g_Params[0].sphereCount;
|
||||
uint myObjCount = (objCount + groupSize - 1) / groupSize;
|
||||
uint myObjStart = threadID * myObjCount;
|
||||
for (uint io = myObjStart; io < myObjStart + myObjCount; ++io)
|
||||
{
|
||||
if (io < objCount)
|
||||
{
|
||||
s_GroupSpheres[io] = g_Spheres[io];
|
||||
s_GroupMaterials[io] = g_Materials[io];
|
||||
}
|
||||
if (io < g_Params[0].emissiveCount)
|
||||
{
|
||||
s_GroupEmissives[io] = g_Emissives[io];
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
int rayCount = 0;
|
||||
float3 col = 0;
|
||||
Params params = g_Params[0];
|
||||
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(gid.x + RandomFloat01(rngState)) * params.invWidth;
|
||||
float v = float(gid.y + RandomFloat01(rngState)) * params.invHeight;
|
||||
Ray r = CameraGetRay(params.cam, u, v, rngState);
|
||||
col += Trace(params.sphereCount, params.emissiveCount, r, rayCount, rngState);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev = srcImage.Load(int3(gid.xy,0)).rgb;
|
||||
col = lerp(col, prev, params.lerpFac);
|
||||
dstImage[gid.xy] = float4(col, 1);
|
||||
|
||||
g_OutRayCount.InterlockedAdd(0, rayCount);
|
||||
}
|
||||
15
examples/ToyPathTracer/Windows/PixelShader.hlsl
Normal file
@@ -0,0 +1,15 @@
|
||||
float3 LinearToSRGB(float3 rgb)
|
||||
{
|
||||
rgb = max(rgb, float3(0, 0, 0));
|
||||
return max(1.055 * pow(rgb, 0.416666667) - 0.055, 0.0);
|
||||
}
|
||||
|
||||
Texture2D tex : register(t0);
|
||||
SamplerState smp : register(s0);
|
||||
|
||||
float4 main(float2 uv : TEXCOORD0) : SV_Target
|
||||
{
|
||||
float3 col = tex.Sample(smp, uv).rgb;
|
||||
col = LinearToSRGB(col);
|
||||
return float4(col, 1.0f);
|
||||
}
|
||||
31
examples/ToyPathTracer/Windows/TestCpu.sln
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.27130.2036
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestCpu", "TestCpu.vcxproj", "{4F84B756-87F5-4B92-827B-DA087DAE1900}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.Build.0 = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.Build.0 = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.ActiveCfg = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.Build.0 = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.ActiveCfg = Release|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {067FB780-37B8-465E-AD7E-E7B238B9C04F}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
243
examples/ToyPathTracer/Windows/TestCpu.vcxproj
Normal file
@@ -0,0 +1,243 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{4F84B756-87F5-4B92-827B-DA087DAE1900}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>TestCpu</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\TracyClient.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp" />
|
||||
<ClCompile Include="..\Source\Maths.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp" />
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Config.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h" />
|
||||
<ClInclude Include="..\Source\Maths.h" />
|
||||
<ClInclude Include="..\Source\MathSimd.h" />
|
||||
<ClInclude Include="..\Source\Test.h" />
|
||||
<ClInclude Include="..\Source\stb_image.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="ComputeShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
</FxCompile>
|
||||
<FxCompile Include="PixelShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
<FxCompile Include="VertexShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
67
examples/ToyPathTracer/Windows/TestCpu.vcxproj.filters
Normal file
@@ -0,0 +1,67 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\Maths.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\TracyClient.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Source">
|
||||
<UniqueIdentifier>{5f19f217-c1c7-4eeb-be61-8b986fee9375}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source\enkiTS">
|
||||
<UniqueIdentifier>{38c448a8-1dcc-4116-9410-a9f8d068caff}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Test.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\stb_image.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Maths.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Config.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\MathSimd.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="VertexShader.hlsl" />
|
||||
<FxCompile Include="PixelShader.hlsl" />
|
||||
<FxCompile Include="ComputeShader.hlsl" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||