src/cpu/mips/vm/stubGenerator_mips_64.cpp

changeset 8
cf5765c81f87
parent 7
e26ad49b7194
child 13
bc227c49eaae
equal deleted inserted replaced
7:e26ad49b7194 8:cf5765c81f87
548 __ delayed()->nop(); 548 __ delayed()->nop();
549 __ dsub(AT, A1, T9); 549 __ dsub(AT, A1, T9);
550 __ bgtz(AT, no_overlap_target); 550 __ bgtz(AT, no_overlap_target);
551 __ delayed()->nop(); 551 __ delayed()->nop();
552 552
553 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
554 Label L;
555 __ bgez(A0, L);
556 __ delayed()->nop();
557 __ bgtz(A1, no_overlap_target);
558 __ delayed()->nop();
559 __ bind(L);
560
553 } 561 }
554 562
555 // 563 //
556 // Generate store check for array 564 // Generate store check for array
557 // 565 //
636 __ move(T8, T1); // original count in T1 644 __ move(T8, T1); // original count in T1
637 __ daddi(AT, T1, -3); 645 __ daddi(AT, T1, -3);
638 __ blez(AT, l_4); 646 __ blez(AT, l_4);
639 __ delayed()->nop(); 647 __ delayed()->nop();
640 if (!aligned) { 648 if (!aligned) {
649 //TODO: copy 8 bytes at one time
641 // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ 650 // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */
642 __ andi(AT, T3, 3); 651 __ andi(AT, T3, 3);
643 __ andi(T9, T0, 3); 652 __ andi(T9, T0, 3);
644 __ bne(AT, T9, l_5); 653 __ bne(AT, T9, l_5);
645 __ delayed()->nop(); 654 __ delayed()->nop();
704 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 713 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
705 // ignored 714 // ignored
706 // name - stub name string 715 // name - stub name string
707 // 716 //
708 // Inputs: 717 // Inputs:
709 // c_rarg0 - source array address 718 // A0 - source array address
710 // c_rarg1 - destination array address 719 // A1 - destination array address
711 // c_rarg2 - element count, treated as ssize_t, can be zero 720 // A2 - element count, treated as ssize_t, can be zero
712 // 721 //
713 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 722 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
714 // we let the hardware handle it. The one to eight bytes within words, 723 // we let the hardware handle it. The one to eight bytes within words,
715 // dwords or qwords that span cache line boundaries will still be loaded 724 // dwords or qwords that span cache line boundaries will still be loaded
716 // and stored atomically. 725 // and stored atomically.
717 // 726 //
718 address generate_conjoint_byte_copy(bool aligned, const char *name) { 727 address generate_conjoint_byte_copy(bool aligned, const char *name) {
719 Label l_1, l_2, l_3, l_4, l_5; 728 __ align(CodeEntryAlignment);
720 Label l_unaligned, l_aligned; 729 StubCodeMark mark(this, "StubRoutines", name);
721 StubCodeMark mark(this, "StubRoutines", name); 730 address start = __ pc();
722 __ align(CodeEntryAlignment); 731
723 address start = __ pc(); 732 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
724 address nooverlap_target = aligned ? 733 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
725 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : 734
726 StubRoutines::jbyte_disjoint_arraycopy(); 735 address nooverlap_target = aligned ?
727 736 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
728 array_overlap_test(nooverlap_target, 0); 737 StubRoutines::jbyte_disjoint_arraycopy();
729 738
730 __ push(T3); 739 array_overlap_test(nooverlap_target, 0);
731 __ push(T0); 740
732 __ push(T1); 741 const Register from = A0; // source array address
733 __ push(T8); 742 const Register to = A1; // destination array address
734 743 const Register count = A2; // elements count
735 // copy from high to low 744 const Register end_from = T3; // source array end address
736 __ move(T3, A0); 745 const Register end_to = T0; // destination array end address
737 __ move(T0, A1); 746 const Register end_count = T1; // destination array end address
738 __ move(T1, A2); 747
739 __ dadd(T3, T3, T1); 748 __ push(end_from);
740 __ dadd(T0, T0, T1); 749 __ push(end_to);
741 750 __ push(end_count);
742 // 2016/5/8 Jin: copy starting unalinged bytes 751 __ push(T8);
743 __ bind(l_unaligned); 752
744 __ beq(T1, R0, l_5); 753 // copy from high to low
745 __ delayed()->nop(); 754 __ move(end_count, count);
746 755 __ dadd(end_from, from, end_count);
747 __ andi(AT, T3, 3); 756 __ dadd(end_to, to, end_count);
748 __ beq(AT, R0, l_aligned); 757
749 __ delayed()->nop(); 758 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
750 __ lb(AT, T3, -1); 759 __ andi(AT, end_from, 3);
751 __ sb(AT, T0, -1); 760 __ andi(T8, end_to, 3);
752 __ daddi(AT, T1, -1); 761 __ bne(AT, T8, l_copy_byte);
753 __ daddi(AT, T3, -1); 762 __ delayed()->nop();
754 __ daddi(AT, T0, -1); 763
755 __ b(l_unaligned); 764 // First deal with the unaligned data at the top.
756 __ delayed()->nop(); 765 __ bind(l_unaligned);
757 766 __ beq(end_count, R0, l_exit);
758 // now T0, T3 point to 4-byte aligned high-ends 767 __ delayed()->nop();
759 // T1 contains byte count that is not copied. 768
760 __ bind(l_aligned); 769 __ andi(AT, end_from, 3);
761 770 __ bne(AT, R0, l_from_unaligned);
762 __ move(T8, T1); 771 __ delayed()->nop();
763 __ daddi(AT, T1, -3); 772
764 __ blez(AT, l_3); 773 __ andi(AT, end_to, 3);
765 __ delayed()->nop(); 774 __ beq(AT, R0, l_4_bytes_aligned);
766 775 __ delayed()->nop();
767 __ andi(T8, T8, 3); 776
768 __ lea(T3, Address(T3, -4)); 777 __ bind(l_from_unaligned);
769 __ lea(T0, Address(T0, -4)); 778 __ lb(AT, end_from, -1);
770 779 __ sb(AT, end_to, -1);
771 __ dsrl(T1, T1, 2); 780 __ daddi(end_from, end_from, -1);
772 __ align(16); 781 __ daddi(end_to, end_to, -1);
773 __ bind(l_1); 782 __ daddi(end_count, end_count, -1);
774 __ lw(AT, T3, 0); 783 __ b(l_unaligned);
775 __ sw(AT, T0, 0); 784 __ delayed()->nop();
776 __ addi(T3, T3, -4); 785
777 __ addi(T0, T0, -4); 786 // now end_to, end_from point to 4-byte aligned high-ends
778 __ addi(T1, T1, -1); 787 // end_count contains byte count that is not copied.
779 __ bne(T1, R0, l_1); 788 // copy 4 bytes at a time
780 __ delayed()->nop(); 789 __ bind(l_4_bytes_aligned);
781 __ b(l_3); 790
782 __ delayed()->nop(); 791 __ move(T8, end_count);
783 // copy dwords aligned or not with repeat move 792 __ daddi(AT, end_count, -3);
784 __ bind(l_2); 793 __ blez(AT, l_copy_suffix);
785 __ bind(l_3); 794 __ delayed()->nop();
786 // copy suffix (0-3 bytes) 795
787 __ andi(T8, T8, 3); 796 //__ andi(T8, T8, 3);
788 __ beq(T8, R0, l_5); 797 __ lea(end_from, Address(end_from, -4));
789 __ delayed()->nop(); 798 __ lea(end_to, Address(end_to, -4));
790 __ addi(T3, T3, 3); 799
791 __ addi(T0, T0, 3); 800 __ dsrl(end_count, end_count, 2);
792 __ bind(l_4); 801 __ align(16);
793 __ lb(AT, T3, 0); 802 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
794 __ sb(AT, T0, 0); 803 __ lw(AT, end_from, 0);
795 __ addi(T3, T3, -1); 804 __ sw(AT, end_to, 0);
796 __ addi(T0, T0, -1); 805 __ addi(end_from, end_from, -4);
797 __ addi(T8, T8, -1); 806 __ addi(end_to, end_to, -4);
798 __ bne(T8, R0, l_4); 807 __ addi(end_count, end_count, -1);
799 __ delayed()->nop(); 808 __ bne(end_count, R0, l_copy_4_bytes_loop);
800 __ bind(l_5); 809 __ delayed()->nop();
801 __ pop(T8); 810
802 __ pop(T1); 811 __ b(l_copy_suffix);
803 __ pop(T0); 812 __ delayed()->nop();
804 __ pop(T3); 813 // copy dwords aligned or not with repeat move
805 __ jr(RA); 814 // l_copy_suffix
806 __ delayed()->nop(); 815 // copy suffix (0-3 bytes)
807 return start; 816 __ bind(l_copy_suffix);
817 __ andi(T8, T8, 3);
818 __ beq(T8, R0, l_exit);
819 __ delayed()->nop();
820 __ addi(end_from, end_from, 3);
821 __ addi(end_to, end_to, 3);
822 __ bind(l_copy_suffix_loop);
823 __ lb(AT, end_from, 0);
824 __ sb(AT, end_to, 0);
825 __ addi(end_from, end_from, -1);
826 __ addi(end_to, end_to, -1);
827 __ addi(T8, T8, -1);
828 __ bne(T8, R0, l_copy_suffix_loop);
829 __ delayed()->nop();
830
831 __ bind(l_copy_byte);
832 __ beq(end_count, R0, l_exit);
833 __ delayed()->nop();
834 __ lb(AT, end_from, -1);
835 __ sb(AT, end_to, -1);
836 __ daddi(end_from, end_from, -1);
837 __ daddi(end_to, end_to, -1);
838 __ daddi(end_count, end_count, -1);
839 __ b(l_copy_byte);
840 __ delayed()->nop();
841
842 __ bind(l_exit);
843 __ pop(T8);
844 __ pop(end_count);
845 __ pop(end_to);
846 __ pop(end_from);
847 __ jr(RA);
848 __ delayed()->nop();
849 return start;
808 } 850 }
809 851
810 // Arguments: 852 // Arguments:
811 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 853 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
812 // ignored 854 // ignored

mercurial