src/cpu/mips/vm/stubGenerator_mips_64.cpp

changeset 117
89e1dfe996be
parent 103
58408aa75fba
child 118
bf4b1d1988a6
equal deleted inserted replaced
107:68d7c979cca6 117:89e1dfe996be
627 // 627 //
628 // Side Effects: 628 // Side Effects:
629 // disjoint_byte_copy_entry is set to the no-overlap entry point 629 // disjoint_byte_copy_entry is set to the no-overlap entry point
630 // used by generate_conjoint_byte_copy(). 630 // used by generate_conjoint_byte_copy().
631 // 631 //
632 address generate_disjoint_byte_copy(bool aligned, const char *name) { 632 address generate_disjoint_byte_copy(bool aligned, const char * name) {
633 StubCodeMark mark(this, "StubRoutines", name); 633 StubCodeMark mark(this, "StubRoutines", name);
634 __ align(CodeEntryAlignment); 634 __ align(CodeEntryAlignment);
635 address start = __ pc(); 635
636 Label l_0, l_1, l_2, l_3, l_4, l_5, l_6; 636
637 637 Register tmp1 = T0;
638 __ push(T3); 638 Register tmp2 = T1;
639 __ push(T0); 639 Register tmp3 = T3;
640 __ push(T1); 640
641 __ push(T8); 641 address start = __ pc();
642 __ move(T3, A0); 642
643 __ move(T0, A1); 643 __ push(tmp1);
644 __ move(T1, A2); 644 __ push(tmp2);
645 __ move(T8, T1); // original count in T1 645 __ push(tmp3);
646 __ daddi(AT, T1, -3); 646 __ move(tmp1, A0);
647 __ blez(AT, l_4); 647 __ move(tmp2, A1);
648 __ delayed()->nop(); 648 __ move(tmp3, A2);
649 if (!aligned) { 649
650 //TODO: copy 8 bytes at one time 650
651 // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ 651 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
652 __ andi(AT, T3, 3); 652 Label l_debug;
653 __ andi(T9, T0, 3); 653
654 __ bne(AT, T9, l_5); 654 __ daddi(AT, tmp3, -9); //why the number is 9 ?
655 __ delayed()->nop(); 655 __ blez(AT, l_9);
656 656 __ delayed()->nop();
657 // align source address at dword address boundary 657
658 __ move(T1, 4); 658 if (!aligned) {
659 __ sub(T1, T1, T3); 659 __ xorr(AT, tmp1, tmp2);
660 __ andi(T1, T1, 3); 660 __ andi(AT, AT, 1);
661 __ beq(T1, R0, l_1); 661 __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
662 __ delayed()->nop(); 662 __ delayed()->nop();
663 __ sub(T8,T8,T1); 663
664 __ bind(l_0); 664 __ andi(AT, tmp1, 1);
665 __ lb(AT, T3, 0); 665 __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
666 __ sb(AT, T0, 0); 666 __ delayed()->nop();
667 __ addi(T3, T3, 1); 667
668 __ addi(T0, T0, 1); 668 __ lb(AT, tmp1, 0);
669 __ addi(T1 ,T1, -1); 669 __ daddi(tmp1, tmp1, 1);
670 __ bne(T1, R0, l_0); 670 __ sb(AT, tmp2, 0);
671 __ delayed()->nop(); 671 __ daddi(tmp2, tmp2, 1);
672 __ bind(l_1); 672 __ daddi(tmp3, tmp3, -1);
673 __ move(T1, T8); 673 __ bind(l_10);
674 } 674
675 __ shr(T1, 2); 675 __ xorr(AT, tmp1, tmp2);
676 __ beq(T1, R0, l_4); // no dwords to move 676 __ andi(AT, AT, 3);
677 __ delayed()->nop(); 677 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
678 // copy aligned dwords 678 __ delayed()->nop();
679 __ bind(l_2); 679
680 __ align(16); 680 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
681 __ bind(l_3); 681
682 __ lw(AT, T3, 0); 682 // Copy 2 elements if necessary to align to 4 bytes.
683 __ sw(AT, T0, 0 ); 683 __ andi(AT, tmp1, 3);
684 __ addi(T3, T3, 4); 684 __ beq(AT, R0, l_2);
685 __ addi(T0, T0, 4); 685 __ delayed()->nop();
686 __ addi(T1, T1, -1); 686
687 __ bne(T1, R0, l_3); 687 __ lhu(AT, tmp1, 0);
688 __ delayed()->nop(); 688 __ daddi(tmp1, tmp1, 2);
689 __ bind(l_4); 689 __ sh(AT, tmp2, 0);
690 __ move(T1, T8); 690 __ daddi(tmp2, tmp2, 2);
691 __ andi(T1, T1, 3); 691 __ daddi(tmp3, tmp3, -2);
692 __ beq(T1, R0, l_6); 692 __ bind(l_2);
693 __ delayed()->nop(); 693
694 // copy suffix 694 // At this point the positions of both, from and to, are at least 4 byte aligned.
695 __ bind(l_5); 695
696 __ lb(AT, T3, 0); 696 // Copy 4 elements at a time.
697 __ sb(AT, T0, 0); 697 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
698 __ addi(T3, T3, 1); 698 __ xorr(AT, tmp1, tmp2);
699 __ addi(T0, T0, 1); 699 __ andi(AT, AT, 7);
700 __ addi(T1, T1, -1); 700 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
701 __ bne(T1, R0, l_5 ); 701 __ delayed()->nop();
702 __ delayed()->nop(); 702
703 __ bind(l_6); 703 // Copy a 4 elements if necessary to align to 8 bytes.
704 __ pop(T8); 704 __ andi(AT, tmp1, 7);
705 __ pop(T1); 705 __ beq(AT, R0, l_7);
706 __ pop(T0); 706 __ delayed()->nop();
707 __ pop(T3); 707
708 __ jr(RA); 708 __ lw(AT, tmp1, 0);
709 __ delayed()->nop(); 709 __ daddi(tmp3, tmp3, -4);
710 return start; 710 __ sw(AT, tmp2, 0);
711 { // FasterArrayCopy
712 __ daddi(tmp1, tmp1, 4);
713 __ daddi(tmp2, tmp2, 4);
714 }
715 }
716
717 __ bind(l_7);
718
719 // Copy 4 elements at a time; either the loads or the stores can
720 // be unaligned if aligned == false.
721
722 { // FasterArrayCopy
723 __ daddi(AT, tmp3, -7);
724 __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
725 __ delayed()->nop();
726
727 __ bind(l_8);
728 // For Loongson, there is 128-bit memory access. TODO
729 __ ld(AT, tmp1, 0);
730 __ sd(AT, tmp2, 0);
731 __ daddi(tmp1, tmp1, 8);
732 __ daddi(tmp2, tmp2, 8);
733 __ daddi(tmp3, tmp3, -8);
734 __ daddi(AT, tmp3, -8);
735 __ bgez(AT, l_8);
736 __ delayed()->nop();
737 }
738 __ bind(l_6);
739
740 // copy 4 bytes at a time
741 { // FasterArrayCopy
742 __ daddi(AT, tmp3, -3);
743 __ blez(AT, l_1);
744 __ delayed()->nop();
745
746 __ bind(l_3);
747 __ lw(AT, tmp1, 0);
748 __ sw(AT, tmp2, 0);
749 __ daddi(tmp1, tmp1, 4);
750 __ daddi(tmp2, tmp2, 4);
751 __ daddi(tmp3, tmp3, -4);
752 __ daddi(AT, tmp3, -4);
753 __ bgez(AT, l_3);
754 __ delayed()->nop();
755
756 }
757
758 // do 2 bytes copy
759 __ bind(l_1);
760 {
761 __ daddi(AT, tmp3, -1);
762 __ blez(AT, l_9);
763 __ delayed()->nop();
764
765 __ bind(l_5);
766 __ lhu(AT, tmp1, 0);
767 __ daddi(tmp3, tmp3, -2);
768 __ sh(AT, tmp2, 0);
769 __ daddi(tmp1, tmp1, 2);
770 __ daddi(tmp2, tmp2, 2);
771 __ daddi(AT, tmp3, -2);
772 __ bgez(AT, l_5);
773 __ delayed()->nop();
774 }
775
776 //do 1 element copy--byte
777 __ bind(l_9);
778 __ beq(R0, tmp3, l_4);
779 __ delayed()->nop();
780
781 {
782 __ bind(l_11);
783 __ lb(AT, tmp1, 0);
784 __ daddi(tmp3, tmp3, -1);
785 __ sb(AT, tmp2, 0);
786 __ daddi(tmp1, tmp1, 1);
787 __ daddi(tmp2, tmp2, 1);
788 __ daddi(AT, tmp3, -1);
789 __ bgez(AT, l_11);
790 __ delayed()->nop();
791 }
792
793 __ bind(l_4);
794 __ pop(tmp3);
795 __ pop(tmp2);
796 __ pop(tmp1);
797
798 __ jr(RA);
799 __ delayed()->nop();
800
801 return start;
711 } 802 }
712 803
713 // Arguments: 804 // Arguments:
714 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 805 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
715 // ignored 806 // ignored

mercurial