bn+leaky relu虐我千百遍,搞定!(cudnn)

bn+leaky relu虐我千百遍,搞定!(cudnn) 等待bntanh激活稳定了腾出手来研究bnleaky relu先不那么复杂最初只能用一个bn层时是可以用relu和leaky relu当初就没想到用tanh激活所以把程序网络砍到最简单方式一个bn层也不用残差如下//使用pading方式layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 5, 32, 32, 32, 3, 1, 1));// layers.emplace_back(std::make_sharedBN(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedMaxPool2D(cudnn, batch, 32, 32, 32, 2, 2, 0, 2));layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 32, 64, 16, 16, 3, 1, 1));layers.emplace_back(std::make_sharedBN(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedMaxPool2D(cudnn, batch, 64, 16, 16, 2, 2, 0, 2));layers.emplace_back(std::make_sharedLinear(cublas, batch, 64*8*8, 500));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 500, 1, 1));layers.emplace_back(std::make_sharedLinear(cublas, batch, 500, 10));然后添加到两个bn如上取消注释马上程序就翻车relu情形改成leaky relu也不行从上面看除了bn层其他没什么问题所以尝试bn层一些参数的修改守株待兔真撞上了bn层更新中删除这两个参数的动量更新立马起作用leakyrelu可以了axpy_kernel (w_size 255) / 256, 256 (w_size, -lr, d_gradBias, 0, 1, d_bias, 0, 1);error_handling(cudaPeekAtLastError());//scale_kernel (w_size 255) / 256, 256 (w_size, 0.9f, d_gradBias, 1);//error_handling(cudaPeekAtLastError());axpy_kernel (w_size 255) / 256, 256 (w_size, -lr, d_gradScale, 0, 1, d_scale, 0, 1);error_handling(cudaPeekAtLastError());/* scale_kernel (w_size 255) / 256, 256 (w_size, 0.9f, d_gradScale, 1);error_handling(cudaPeekAtLastError());*/所以还原到基本盘即最简单情形最容易解决问题两个bn层leaky relu成功了顺便把残差加入两个两个残差中各有一个bn层运行失败lr0.018更改为0.001ok成功好开始尝试六个bn层即残差中两个conv卷积都配上bn层所以残差一共又四个bn层运行又翻车跑到第十轮方差太大到了89千崩溃了原来第二个残差前用了一个5*5的卷积核改成3*3好一些但是十二三轮方差还会太大崩溃所以就运行10轮测试能跑50多分这也算成功了别看分数低然后发现残差层中两个卷积3*33*3或者用1*13*3也有巧妙第二个残差用3*33*3第一个残差用1*13*3竟然可以用6个bn层10轮成功15轮成功20轮也成功方差也控制在1000以内还是简单好调试参数也少干扰也少下面是运行结果train Classification result: 83.82% ok (used 49984 images)均值: 0.6118095517,方差:0.0305565596均值: 0.2793858945,方差:0.0053950339均值: 0.3383760750,方差:0.0150984041均值: -0.0055495882,方差:0.0045621828均值: 0.1230476499,方差:0.0189078171均值: -0.3529624045,方差:0.0349509269均值: 0.2177584618,方差:0.0206815433均值: 0.2719434202,方差:0.0082078036均值: -0.1685208082,方差:0.0195145272均值: -0.0787319839,方差:0.0039726906均值: 0.0138385594,方差:0.0060876021均值: 0.1236321479,方差:0.0087135769均值: -0.1566630453,方差:0.0022761899均值: -0.5770806074,方差:0.0107879424均值: 0.0708066970,方差:0.0203303955均值: -0.4398427308,方差:0.0096681817均值: 0.1757275909,方差:0.0087454943均值: 0.0271942466,方差:0.0139336865均值: 0.0941561088,方差:0.0079159336均值: -0.3148907721,方差:0.0140706869均值: 0.2468893826,方差:0.0092326524均值: 0.0818228200,方差:0.0087158987均值: 0.1561099738,方差:0.0074163754均值: -0.4497804046,方差:0.0433151573均值: 0.1684067398,方差:0.0111323437均值: 0.0006642357,方差:0.0076282038均值: -0.0134149687,方差:0.0151203573均值: -0.3764256239,方差:0.0696884915均值: 0.1404187977,方差:0.0310208201均值: 0.2501972914,方差:0.0145718530均值: -0.5813394189,方差:0.0636308864均值: -0.0528681763,方差:0.0053213448rb均值: -8.4806413651,rb方差:101.411819458008rb均值: -11.4137248993,rb方差:122.408134460449rb均值: 12.3597068787,rb方差:317.574218750000rb均值: -52.9603614807,rb方差:523.926208496094rb均值: -0.8453392386,rb方差:116.965484619141rb均值: 22.9101257324,rb方差:46.707012176514rb均值: 24.4838714600,rb方差:224.510467529297rb均值: -24.5759201050,rb方差:63.472591400146rb均值: -0.4204456508,rb方差:238.861862182617rb均值: 26.5995273590,rb方差:84.085540771484rb均值: 15.8835315704,rb方差:195.695297241211rb均值: 28.1972332001,rb方差:93.929595947266rb均值: 23.4342708588,rb方差:53.039161682129rb均值: 21.7705802917,rb方差:279.336669921875rb均值: 14.9386043549,rb方差:35.861545562744rb均值: 8.3071842194,rb方差:120.865402221680rb均值: 5.6488070488,rb方差:170.923965454102rb均值: -12.9310369492,rb方差:379.897521972656rb均值: 34.9000511169,rb方差:356.919525146484rb均值: -8.8135833740,rb方差:163.469909667969rb均值: 6.4712519646,rb方差:35.504989624023rb均值: 16.2970867157,rb方差:60.593471527100rb均值: 28.6945095062,rb方差:112.165588378906rb均值: 7.8517494202,rb方差:233.192047119141rb均值: -30.9962787628,rb方差:245.233276367188rb均值: 21.0381984711,rb方差:131.946334838867rb均值: 19.8696365356,rb方差:84.784614562988rb均值: -14.2001428604,rb方差:70.479873657227rb均值: 7.4040813446,rb方差:119.022712707520rb均值: 5.8504438400,rb方差:92.035018920898rb均值: 37.3318634033,rb方差:204.922729492188rb均值: 0.9926319122,rb方差:38.232997894287rb均值: -9.7257099152,rb方差:112.572578430176rb均值: 29.9090862274,rb方差:149.066635131836rb均值: -5.1698660851,rb方差:122.542869567871rb均值: 6.5652356148,rb方差:142.803054809570rb均值: 23.8881015778,rb方差:272.558563232422rb均值: -7.7413511276,rb方差:165.707977294922rb均值: 28.6822834015,rb方差:139.270294189453rb均值: 14.4575109482,rb方差:174.076141357422rb均值: -4.2465724945,rb方差:76.789413452148rb均值: 2.6537570953,rb方差:100.457107543945rb均值: 7.4661836624,rb方差:173.971725463867rb均值: -15.7740859985,rb方差:112.883636474609rb均值: 15.6900796890,rb方差:205.756454467773rb均值: 3.7621457577,rb方差:140.911560058594rb均值: 24.8136711121,rb方差:138.023254394531rb均值: 16.6098937988,rb方差:152.412887573242rb均值: -28.0669155121,rb方差:178.396530151367rb均值: 3.8906049728,rb方差:161.419616699219rb均值: 24.8428611755,rb方差:215.306030273438rb均值: 46.0853462219,rb方差:271.363037109375rb均值: -26.8367481232,rb方差:200.552185058594rb均值: 10.2039203644,rb方差:74.181938171387rb均值: 3.6904249191,rb方差:213.065368652344rb均值: 20.4676151276,rb方差:171.363845825195rb均值: -1.0041704178,rb方差:70.853698730469rb均值: 10.6889600754,rb方差:116.365386962891rb均值: 1.7627170086,rb方差:160.810485839844rb均值: -1.7885169983,rb方差:242.990524291992rb均值: 5.0322575569,rb方差:227.469360351563rb均值: 30.9113197327,rb方差:199.812057495117rb均值: -11.6709108353,rb方差:159.031906127930rb均值: 17.8245544434,rb方差:230.429580688477均值: -14.3063440323,方差:212.6967315674均值: -4.2473783493,方差:239.5746765137均值: 4.7482366562,方差:322.2791137695均值: 36.6637802124,方差:217.3679809570均值: 8.3078842163,方差:175.8049926758均值: -41.3049545288,方差:333.8974304199均值: 2.9841489792,方差:198.7082672119均值: -7.1169891357,方差:292.8938903809均值: 5.8974962234,方差:144.7068481445均值: -9.9190168381,方差:265.7385864258均值: 2.6529972553,方差:160.6676483154均值: -3.9148793221,方差:323.2657165527均值: -11.3659296036,方差:278.5631408691均值: 2.2589273453,方差:88.2759399414均值: 15.5458183289,方差:152.2501373291均值: 30.6713314056,方差:364.7858276367均值: 4.1193246841,方差:247.5183563232均值: 13.4563055038,方差:156.3102111816均值: -0.8886789083,方差:234.7144165039均值: -1.7322585583,方差:177.3148498535均值: -8.3087692261,方差:202.6070404053均值: -9.4661026001,方差:209.3522491455均值: -7.1613726616,方差:130.7668304443均值: -5.2931704521,方差:205.0260162354均值: -7.0488700867,方差:212.8831787109均值: -1.3651547432,方差:216.9725952148均值: 6.8491601944,方差:134.7440643311均值: -14.7775821686,方差:226.5520782471均值: 19.5953731537,方差:135.3577880859均值: -2.5836834908,方差:262.4497985840均值: -16.1943855286,方差:210.7336730957均值: 12.9428796768,方差:109.2070846558rb均值: -2.6685829163,rb方差:250.382232666016rb均值: 4.8719229698,rb方差:260.582672119141rb均值: 13.1458568573,rb方差:354.083251953125rb均值: -5.9272623062,rb方差:156.465911865234rb均值: -0.2539663613,rb方差:328.437805175781rb均值: 4.4516682625,rb方差:191.338668823242rb均值: 4.9977560043,rb方差:209.526214599609rb均值: 13.9853897095,rb方差:167.047912597656rb均值: 8.7264404297,rb方差:240.878616333008rb均值: 17.3459320068,rb方差:192.378524780273rb均值: -2.1612541676,rb方差:160.447204589844rb均值: 1.7245656252,rb方差:183.805892944336rb均值: 23.8260765076,rb方差:274.420471191406rb均值: 6.9794116020,rb方差:194.489974975586rb均值: 15.3016529083,rb方差:256.790618896484rb均值: 1.4309482574,rb方差:375.432128906250rb均值: 15.2151966095,rb方差:286.015167236328rb均值: -3.5926125050,rb方差:203.680389404297rb均值: 11.3979730606,rb方差:224.947006225586rb均值: -5.0777764320,rb方差:185.275207519531rb均值: 8.8142852783,rb方差:192.851196289063rb均值: -3.9396216869,rb方差:186.560379028320rb均值: 2.5894513130,rb方差:244.162246704102rb均值: -16.8359432220,rb方差:130.831741333008rb均值: -6.1615014076,rb方差:249.044586181641rb均值: -2.2876145840,rb方差:177.025787353516rb均值: 16.7811927795,rb方差:338.273254394531rb均值: 9.8503847122,rb方差:274.422393798828rb均值: 0.6609696150,rb方差:187.040588378906rb均值: -4.7715773582,rb方差:185.646545410156rb均值: -2.2064201832,rb方差:215.495895385742rb均值: -16.4317283630,rb方差:246.272476196289rb均值: -16.3173217773,rb方差:559.041931152344rb均值: -23.1658096313,rb方差:639.483093261719rb均值: 24.2976245880,rb方差:308.769989013672rb均值: 10.7532100677,rb方差:315.648193359375rb均值: 2.6104147434,rb方差:399.915588378906rb均值: 12.8525791168,rb方差:636.379821777344rb均值: 4.2293353081,rb方差:601.880432128906rb均值: -7.4254078865,rb方差:401.565979003906rb均值: -5.3762321472,rb方差:331.994720458984rb均值: -2.7980070114,rb方差:392.799896240234rb均值: 4.7686576843,rb方差:484.794281005859rb均值: 19.5441951752,rb方差:573.917480468750rb均值: 0.9318694472,rb方差:483.537445068359rb均值: -25.3258953094,rb方差:507.756256103516rb均值: -6.8600959778,rb方差:299.185821533203rb均值: -37.0693588257,rb方差:625.283935546875rb均值: 11.6823406219,rb方差:391.826263427734rb均值: -18.8439788818,rb方差:535.221313476563rb均值: -19.0878314972,rb方差:448.953216552734rb均值: -0.6513226032,rb方差:376.679718017578rb均值: 2.8856112957,rb方差:449.546173095703rb均值: -10.7838602066,rb方差:756.389099121094rb均值: -13.5816154480,rb方差:410.006195068359rb均值: 5.8713550568,rb方差:461.585266113281rb均值: -7.2429766655,rb方差:367.359100341797rb均值: -11.7004413605,rb方差:386.579010009766rb均值: 7.1214933395,rb方差:355.404357910156rb均值: -14.0512533188,rb方差:392.533538818359rb均值: -6.2349352837,rb方差:359.201660156250rb均值: -4.8889312744,rb方差:399.811767578125rb均值: -3.4916837215,rb方差:698.075012207031rb均值: -2.3244826794,rb方差:387.817810058594learn rate0.001轮次19时间: 2240.975098 msTest Classification result: 64.60% ok (used 9984 images)请按任意键继续. . .残差里边的bn层方差很大说明残差的引入数据扰动还是很大的下面是定性的网络结构成功运行6bnleaky relu虽然成绩没有6bntanh激活高但也算成功了网络小tanh与leaky运行速度上差别不大//使用pading方式layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 5, 32, 32, 32, 3, 1, 1));layers.emplace_back(std::make_sharedBN(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedresidual(cublas, cudnn, batch));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedMaxPool2D(cudnn, batch, 32, 32, 32, 2, 2, 0, 2)); //s4,16*8*8-16*4*4layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 32, 64, 16, 16, 3, 1, 1));layers.emplace_back(std::make_sharedBN(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedresidualExt2(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 64, 16, 16));layers.emplace_back(std::make_sharedMaxPool2D(cudnn, batch, 64, 16, 16, 2, 2, 0, 2));layers.emplace_back(std::make_sharedLinear(cublas, batch, 64*64, 500));// layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 64, 500, 7, 7, 7, 1));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 500, 1, 1));layers.emplace_back(std::make_sharedLinear(cublas, batch, 500, 10));//84-10你别看用的是relu字面内层其实被leaky relu替换了2个残差如下classresidual:public Layer {public:residual(cublasHandle_t cublas_, cudnnHandle_t cudnn_, int batch_) :cublas(cublas_), cudnn(cudnn_), batch(batch_) {//使用了数字常量这个残差只能用一次202602091332//尝试残差此处要记住输入Xlayers.emplace_back(std::make_sharedConv2D(cudnn, batch, 32, 32, 32, 32, 1, 1));//c3,6*12*12-16*8*8layers.emplace_back(std::make_sharedBN(cudnn, batch, 32, 32, 32));layers.emplace_back(std::make_sharedReLU(cudnn, batch, 32, 32, 32)); //c3,6*12*12-16*8*8layers.emplace_back(std::make_sharedConv2D(cudnn, batch, 32, 32, 32, 32, 3, 1, 1));layers.emplace_back(std::make_sharedBN(cudnn, batch, 32, 32, 32));。。。。。}classresidualExt2:public Layer {public:residualExt2(cudnnHandle_t cudnn_, int batch_, int c, int h, int w) : cudnn(cudnn_), batch(batch_), _c(c), _h(h), _w(w) {//使用了数字常量这个残差只能用一次202602091332//尝试残差此处要记住输入Xlayers.emplace_back(std::make_sharedConv2D(cudnn, batch, _c, _c, _h, _w, 3, 1, 1));//c3,6*12*12-16*8*8layers.emplace_back(std::make_sharedBN(cudnn, batch, _c, _h, _w));layers.emplace_back(std::make_sharedReLU(cudnn, batch, _c, _h, _w)); //c3,6*12*12-16*8*8layers.emplace_back(std::make_sharedConv2D(cudnn, batch, _c, _c, _h, _w, 3, 1, 1));layers.emplace_back(std::make_sharedBN(cudnn, batch, _c, _h, _w));。。。。。。}注其实觉得卷积bn方式可以把卷积的bias全部删除尝试后还是有bias比较好前面用he init参数成功也可以改回原来试一试人都是一样的无路可走时会不自信起来init_uniform (in_features*out_features 255) / 256, 256 (weight, in_features*out_features, 1, -0.05f, 0.05f);//先同意设置为1init_uniform (out_features 255) / 256, 256 (bias, out_features, 1, -0.05f, 0.05f);