src/cpu/ppc/vm/vm_version_ppc.cpp

changeset 6495
67fa91961822
parent 6490
41b780b43b74
child 6511
31e80afe3fed
     1.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Tue Dec 10 14:29:43 2013 +0100
     1.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Dec 11 00:06:11 2013 +0100
     1.3 @@ -89,16 +89,17 @@
     1.4    }
     1.5  
     1.6    // On Power6 test for section size.
     1.7 -  if (PowerArchitecturePPC64 == 6)
     1.8 +  if (PowerArchitecturePPC64 == 6) {
     1.9      determine_section_size();
    1.10 -  // TODO: PPC port else
    1.11 +  // TODO: PPC port } else {
    1.12    // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
    1.13 +  }
    1.14  
    1.15    MaxVectorSize = 8;
    1.16  #endif
    1.17  
    1.18    // Create and print feature-string.
    1.19 -  char buf[(num_features+1) * 16]; // max 16 chars per feature
    1.20 +  char buf[(num_features+1) * 16]; // Max 16 chars per feature.
    1.21    jio_snprintf(buf, sizeof(buf),
    1.22                 "ppc64%s%s%s%s%s%s%s%s",
    1.23                 (has_fsqrt()   ? " fsqrt"   : ""),
    1.24 @@ -127,21 +128,21 @@
    1.25    if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
    1.26  
    1.27    if (AllocatePrefetchStyle == 4) {
    1.28 -    AllocatePrefetchStepSize = cache_line_size; // need exact value
    1.29 -    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default
    1.30 -    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ?
    1.31 +    AllocatePrefetchStepSize = cache_line_size; // Need exact value.
    1.32 +    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
    1.33 +    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
    1.34    } else {
    1.35      if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
    1.36 -    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value
    1.37 -    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ?
    1.38 +    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
    1.39 +    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
    1.40    }
    1.41  
    1.42    assert(AllocatePrefetchLines > 0, "invalid value");
    1.43    if (AllocatePrefetchLines < 1) // Set valid value in product VM.
    1.44 -    AllocatePrefetchLines = 1; // Conservative value
    1.45 +    AllocatePrefetchLines = 1; // Conservative value.
    1.46  
    1.47    if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
    1.48 -    AllocatePrefetchStyle = 1; // fall back if inappropriate
    1.49 +    AllocatePrefetchStyle = 1; // Fall back if inappropriate.
    1.50  
    1.51    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
    1.52  }
    1.53 @@ -160,13 +161,13 @@
    1.54  
    1.55    const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
    1.56  
    1.57 -  // Allocate space for the code
    1.58 +  // Allocate space for the code.
    1.59    ResourceMark rm;
    1.60    CodeBuffer cb("detect_section_size", code_size, 0);
    1.61    MacroAssembler* a = new MacroAssembler(&cb);
    1.62  
    1.63    uint32_t *code = (uint32_t *)a->pc();
    1.64 -  // emit code.
    1.65 +  // Emit code.
    1.66    void (*test1)() = (void(*)())(void *)a->emit_fd();
    1.67  
    1.68    Label l1;
    1.69 @@ -189,58 +190,58 @@
    1.70  
    1.71      // ;;  1
    1.72      a->nop();                   // 5
    1.73 -    a->fmr(F6, F6);     // 6
    1.74 -    a->fmr(F7, F7);     // 7
    1.75 +    a->fmr(F6, F6);             // 6
    1.76 +    a->fmr(F7, F7);             // 7
    1.77      a->endgroup();              // 8
    1.78      // ------- sector 8 ------------
    1.79  
    1.80      // ;;  2
    1.81      a->nop();                   // 9
    1.82      a->nop();                   // 10
    1.83 -    a->fmr(F8, F8);     // 11
    1.84 -    a->fmr(F9, F9);     // 12
    1.85 +    a->fmr(F8, F8);             // 11
    1.86 +    a->fmr(F9, F9);             // 12
    1.87  
    1.88      // ;;  3
    1.89      a->nop();                   // 13
    1.90 -    a->fmr(F10, F10);   // 14
    1.91 -    a->fmr(F11, F11);   // 15
    1.92 +    a->fmr(F10, F10);           // 14
    1.93 +    a->fmr(F11, F11);           // 15
    1.94      a->endgroup();              // 16
    1.95      // -------- sector 16 -------------
    1.96  
    1.97      // ;;  4
    1.98      a->nop();                   // 17
    1.99      a->nop();                   // 18
   1.100 -    a->fmr(F15, F15);   // 19
   1.101 -    a->fmr(F16, F16);   // 20
   1.102 +    a->fmr(F15, F15);           // 19
   1.103 +    a->fmr(F16, F16);           // 20
   1.104  
   1.105      // ;;  5
   1.106      a->nop();                   // 21
   1.107 -    a->fmr(F17, F17);   // 22
   1.108 -    a->fmr(F18, F18);   // 23
   1.109 +    a->fmr(F17, F17);           // 22
   1.110 +    a->fmr(F18, F18);           // 23
   1.111      a->endgroup();              // 24
   1.112      // ------- sector 24  ------------
   1.113  
   1.114      // ;;  6
   1.115      a->nop();                   // 25
   1.116      a->nop();                   // 26
   1.117 -    a->fmr(F19, F19);     // 27
   1.118 -    a->fmr(F20, F20);     // 28
   1.119 +    a->fmr(F19, F19);           // 27
   1.120 +    a->fmr(F20, F20);           // 28
   1.121  
   1.122      // ;;  7
   1.123      a->nop();                   // 29
   1.124 -    a->fmr(F21, F21);   // 30
   1.125 -    a->fmr(F22, F22);   // 31
   1.126 +    a->fmr(F21, F21);           // 30
   1.127 +    a->fmr(F22, F22);           // 31
   1.128      a->brnop0();                // 32
   1.129  
   1.130      // ------- sector 32 ------------
   1.131    }
   1.132  
   1.133    // ;; 8
   1.134 -  a->cmpdi(CCR0, R4, unroll);// 33
   1.135 -  a->bge(CCR0, l1);         // 34
   1.136 +  a->cmpdi(CCR0, R4, unroll);   // 33
   1.137 +  a->bge(CCR0, l1);             // 34
   1.138    a->blr();
   1.139  
   1.140 -  // emit code.
   1.141 +  // Emit code.
   1.142    void (*test2)() = (void(*)())(void *)a->emit_fd();
   1.143    // uint32_t *code = (uint32_t *)a->pc();
   1.144  
   1.145 @@ -382,39 +383,40 @@
   1.146  #endif // COMPILER2
   1.147  
   1.148  void VM_Version::determine_features() {
   1.149 -  const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction)
   1.150 +  // 7 InstWords for each call (function descriptor + blr instruction).
   1.151 +  const int code_size = (num_features+1+2*7)*BytesPerInstWord;
   1.152    int features = 0;
   1.153  
   1.154    // create test area
   1.155 -  enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size)
   1.156 +  enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
   1.157    char test_area[BUFFER_SIZE];
   1.158    char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
   1.159  
   1.160 -  // Allocate space for the code
   1.161 +  // Allocate space for the code.
   1.162    ResourceMark rm;
   1.163    CodeBuffer cb("detect_cpu_features", code_size, 0);
   1.164    MacroAssembler* a = new MacroAssembler(&cb);
   1.165  
   1.166 -  // emit code.
   1.167 +  // Emit code.
   1.168    void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd();
   1.169    uint32_t *code = (uint32_t *)a->pc();
   1.170    // Don't use R0 in ldarx.
   1.171 -  // keep R3_ARG1 = R3 unmodified, it contains &field (see below)
   1.172 -  // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below)
   1.173 -  a->fsqrt(F3, F4);                         // code[0] -> fsqrt_m
   1.174 -  a->isel(R7, R5, R6, 0);                   // code[1] -> isel_m
   1.175 -  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m
   1.176 -  a->cmpb(R7, R5, R6);                      // code[3] -> bcmp
   1.177 -  //a->mftgpr(R7, F3);                      // code[4] -> mftgpr
   1.178 -  a->popcntb(R7, R5);                       // code[5] -> popcntb
   1.179 -  a->popcntw(R7, R5);                       // code[6] -> popcntw
   1.180 -  a->fcfids(F3, F4);                        // code[7] -> fcfids
   1.181 -  a->vand(VR0, VR0, VR0);                   // code[8] -> vand
   1.182 +  // Keep R3_ARG1 unmodified, it contains &field (see below).
   1.183 +  // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
   1.184 +  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
   1.185 +  a->isel(R7, R5, R6, 0);                      // code[1] -> isel_m
   1.186 +  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m
   1.187 +  a->cmpb(R7, R5, R6);                         // code[3] -> bcmp
   1.188 +  //a->mftgpr(R7, F3);                         // code[4] -> mftgpr
   1.189 +  a->popcntb(R7, R5);                          // code[5] -> popcntb
   1.190 +  a->popcntw(R7, R5);                          // code[6] -> popcntw
   1.191 +  a->fcfids(F3, F4);                           // code[7] -> fcfids
   1.192 +  a->vand(VR0, VR0, VR0);                      // code[8] -> vand
   1.193    a->blr();
   1.194  
   1.195 -  // Emit function to set one cache line to zero
   1.196 -  void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it
   1.197 -  a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr
   1.198 +  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
   1.199 +  void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd();
   1.200 +  a->dcbz(R3_ARG1); // R3_ARG1 = addr
   1.201    a->blr();
   1.202  
   1.203    uint32_t *code_end = (uint32_t *)a->pc();
   1.204 @@ -428,8 +430,8 @@
   1.205    }
   1.206  
   1.207    // Measure cache line size.
   1.208 -  memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF
   1.209 -  (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle
   1.210 +  memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
   1.211 +  (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
   1.212    int count = 0; // count zeroed bytes
   1.213    for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
   1.214    guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");

mercurial