1.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp Tue Dec 10 14:29:43 2013 +0100 1.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Dec 11 00:06:11 2013 +0100 1.3 @@ -89,16 +89,17 @@ 1.4 } 1.5 1.6 // On Power6 test for section size. 1.7 - if (PowerArchitecturePPC64 == 6) 1.8 + if (PowerArchitecturePPC64 == 6) { 1.9 determine_section_size(); 1.10 - // TODO: PPC port else 1.11 + // TODO: PPC port } else { 1.12 // TODO: PPC port PdScheduling::power6SectorSize = 0x20; 1.13 + } 1.14 1.15 MaxVectorSize = 8; 1.16 #endif 1.17 1.18 // Create and print feature-string. 1.19 - char buf[(num_features+1) * 16]; // max 16 chars per feature 1.20 + char buf[(num_features+1) * 16]; // Max 16 chars per feature. 1.21 jio_snprintf(buf, sizeof(buf), 1.22 "ppc64%s%s%s%s%s%s%s%s", 1.23 (has_fsqrt() ? " fsqrt" : ""), 1.24 @@ -127,21 +128,21 @@ 1.25 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; 1.26 1.27 if (AllocatePrefetchStyle == 4) { 1.28 - AllocatePrefetchStepSize = cache_line_size; // need exact value 1.29 - if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default 1.30 - if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ? 1.31 + AllocatePrefetchStepSize = cache_line_size; // Need exact value. 1.32 + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default. 1.33 + if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined? 1.34 } else { 1.35 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; 1.36 - if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value 1.37 - if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ? 1.38 + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value. 1.39 + if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined? 1.40 } 1.41 1.42 assert(AllocatePrefetchLines > 0, "invalid value"); 1.43 if (AllocatePrefetchLines < 1) // Set valid value in product VM. 1.44 - AllocatePrefetchLines = 1; // Conservative value 1.45 + AllocatePrefetchLines = 1; // Conservative value. 1.46 1.47 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) 1.48 - AllocatePrefetchStyle = 1; // fall back if inappropriate 1.49 + AllocatePrefetchStyle = 1; // Fall back if inappropriate. 1.50 1.51 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 1.52 } 1.53 @@ -160,13 +161,13 @@ 1.54 1.55 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; 1.56 1.57 - // Allocate space for the code 1.58 + // Allocate space for the code. 1.59 ResourceMark rm; 1.60 CodeBuffer cb("detect_section_size", code_size, 0); 1.61 MacroAssembler* a = new MacroAssembler(&cb); 1.62 1.63 uint32_t *code = (uint32_t *)a->pc(); 1.64 - // emit code. 1.65 + // Emit code. 1.66 void (*test1)() = (void(*)())(void *)a->emit_fd(); 1.67 1.68 Label l1; 1.69 @@ -189,58 +190,58 @@ 1.70 1.71 // ;; 1 1.72 a->nop(); // 5 1.73 - a->fmr(F6, F6); // 6 1.74 - a->fmr(F7, F7); // 7 1.75 + a->fmr(F6, F6); // 6 1.76 + a->fmr(F7, F7); // 7 1.77 a->endgroup(); // 8 1.78 // ------- sector 8 ------------ 1.79 1.80 // ;; 2 1.81 a->nop(); // 9 1.82 a->nop(); // 10 1.83 - a->fmr(F8, F8); // 11 1.84 - a->fmr(F9, F9); // 12 1.85 + a->fmr(F8, F8); // 11 1.86 + a->fmr(F9, F9); // 12 1.87 1.88 // ;; 3 1.89 a->nop(); // 13 1.90 - a->fmr(F10, F10); // 14 1.91 - a->fmr(F11, F11); // 15 1.92 + a->fmr(F10, F10); // 14 1.93 + a->fmr(F11, F11); // 15 1.94 a->endgroup(); // 16 1.95 // -------- sector 16 ------------- 1.96 1.97 // ;; 4 1.98 a->nop(); // 17 1.99 a->nop(); // 18 1.100 - a->fmr(F15, F15); // 19 1.101 - a->fmr(F16, F16); // 20 1.102 + a->fmr(F15, F15); // 19 1.103 + a->fmr(F16, F16); // 20 1.104 1.105 // ;; 5 1.106 a->nop(); // 21 1.107 - a->fmr(F17, F17); // 22 1.108 - a->fmr(F18, F18); // 23 1.109 + a->fmr(F17, F17); // 22 1.110 + a->fmr(F18, F18); // 23 1.111 a->endgroup(); // 24 1.112 // ------- sector 24 ------------ 1.113 1.114 // ;; 6 1.115 a->nop(); // 25 1.116 a->nop(); // 26 1.117 - a->fmr(F19, F19); // 27 1.118 - a->fmr(F20, F20); // 28 1.119 + a->fmr(F19, F19); // 27 1.120 + a->fmr(F20, F20); // 28 1.121 1.122 // ;; 7 1.123 a->nop(); // 29 1.124 - a->fmr(F21, F21); // 30 1.125 - a->fmr(F22, F22); // 31 1.126 + a->fmr(F21, F21); // 30 1.127 + a->fmr(F22, F22); // 31 1.128 a->brnop0(); // 32 1.129 1.130 // ------- sector 32 ------------ 1.131 } 1.132 1.133 // ;; 8 1.134 - a->cmpdi(CCR0, R4, unroll);// 33 1.135 - a->bge(CCR0, l1); // 34 1.136 + a->cmpdi(CCR0, R4, unroll); // 33 1.137 + a->bge(CCR0, l1); // 34 1.138 a->blr(); 1.139 1.140 - // emit code. 1.141 + // Emit code. 1.142 void (*test2)() = (void(*)())(void *)a->emit_fd(); 1.143 // uint32_t *code = (uint32_t *)a->pc(); 1.144 1.145 @@ -382,39 +383,40 @@ 1.146 #endif // COMPILER2 1.147 1.148 void VM_Version::determine_features() { 1.149 - const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction) 1.150 + // 7 InstWords for each call (function descriptor + blr instruction). 1.151 + const int code_size = (num_features+1+2*7)*BytesPerInstWord; 1.152 int features = 0; 1.153 1.154 // create test area 1.155 - enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size) 1.156 + enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size). 1.157 char test_area[BUFFER_SIZE]; 1.158 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; 1.159 1.160 - // Allocate space for the code 1.161 + // Allocate space for the code. 1.162 ResourceMark rm; 1.163 CodeBuffer cb("detect_cpu_features", code_size, 0); 1.164 MacroAssembler* a = new MacroAssembler(&cb); 1.165 1.166 - // emit code. 1.167 + // Emit code. 1.168 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd(); 1.169 uint32_t *code = (uint32_t *)a->pc(); 1.170 // Don't use R0 in ldarx. 1.171 - // keep R3_ARG1 = R3 unmodified, it contains &field (see below) 1.172 - // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below) 1.173 - a->fsqrt(F3, F4); // code[0] -> fsqrt_m 1.174 - a->isel(R7, R5, R6, 0); // code[1] -> isel_m 1.175 - a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m 1.176 - a->cmpb(R7, R5, R6); // code[3] -> bcmp 1.177 - //a->mftgpr(R7, F3); // code[4] -> mftgpr 1.178 - a->popcntb(R7, R5); // code[5] -> popcntb 1.179 - a->popcntw(R7, R5); // code[6] -> popcntw 1.180 - a->fcfids(F3, F4); // code[7] -> fcfids 1.181 - a->vand(VR0, VR0, VR0); // code[8] -> vand 1.182 + // Keep R3_ARG1 unmodified, it contains &field (see below). 1.183 + // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). 1.184 + a->fsqrt(F3, F4); // code[0] -> fsqrt_m 1.185 + a->isel(R7, R5, R6, 0); // code[1] -> isel_m 1.186 + a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m 1.187 + a->cmpb(R7, R5, R6); // code[3] -> bcmp 1.188 + //a->mftgpr(R7, F3); // code[4] -> mftgpr 1.189 + a->popcntb(R7, R5); // code[5] -> popcntb 1.190 + a->popcntw(R7, R5); // code[6] -> popcntw 1.191 + a->fcfids(F3, F4); // code[7] -> fcfids 1.192 + a->vand(VR0, VR0, VR0); // code[8] -> vand 1.193 a->blr(); 1.194 1.195 - // Emit function to set one cache line to zero 1.196 - void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it 1.197 - a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr 1.198 + // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. 1.199 + void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); 1.200 + a->dcbz(R3_ARG1); // R3_ARG1 = addr 1.201 a->blr(); 1.202 1.203 uint32_t *code_end = (uint32_t *)a->pc(); 1.204 @@ -428,8 +430,8 @@ 1.205 } 1.206 1.207 // Measure cache line size. 1.208 - memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF 1.209 - (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle 1.210 + memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF. 1.211 + (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle. 1.212 int count = 0; // count zeroed bytes 1.213 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; 1.214 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");