How google.com deal with China’s Great Firewall
Monday, July 2nd, 2007GFW in China is some kind terrible “big brother” which judge the live or death of a web site. Beside some websites who have been executed permently(such as zh.wikipedia.com, etc.), GFW scan the content on the fly and once some content triggered the alarm, the current TCP connection will be reseted, and this site will be blocked from your current IP address for some minutes.
Google is one of sites sufered from GFW so much, google launched a self-censored site in China with the domain name google.cn, however, for most of the people, they already get used to “www.google.com” or they have toolbar installed with google.com, and non-technical people can’t tell from “google.com” to “google.cn”, still most of the traffic will go to Google.com, that is to say, under the monotoring of GFW.
Google developed some hack inside the front page of www.google.com, it’s a small pieces of Javascript, which send some ajax request and if such ajax request is not returned successfully, it will redirect the URL to google.cn with the same query strings. ( I guess only people from China IP will get this piece JS code, but not confirmed)
The JS code is not quite readable, obviously google don’t wish you do so…:) But we can still easy understand what it intend to:
<script>
<!–
window.google={kEI:”7dmIRuzeIJOesAKC0pnIDw”,kEXPI:”17259,17497,17585″,kHL:”en”};function sf(){document.f.q.focus();}
window.rwt=function(b,d,f,j,k,g,l){var a=window.encodeURIComponent?encodeURIComponent:escape,h=”",i=”",c=b.href.split(”#”),e=”";if(d){h=”&oi=”+a(d)}if(f){i=”&cad=”+a(f)}if(g){e=”&usg=”+g}b.href=”/url?sa=t”+h+i+”&ct=”+a(j)+”&cd=”+a(k)+”&url=”+a(c[0]).replace(/\+/g,”%2B”)+”&ei=7dmIRuzeIJOesAKC0pnIDw”+e+l+(c[1]?”#”+c[1]:”");b.onmousedown=”";return true};window.gbar={};(function(){function C(a,d,b){var c=”on”+d;if(a.addEventListener){a.addEventListener(d,b,false)}else if(a.attachEvent){a.attachEvent(c,b)}else{var g=a[c];a[c]=function(){var f=g.apply(this,arguments),h=b.apply(this,arguments);return f==undefined?h:(h==undefined?f:h&&f)}}};var k=window,u=k.location,A=u.search,z=u.protocol,j=document,q=”appendChild”,m=k.gbar,l=”",s,w=”",e,i,n,t=”wivnlmzbjpcoegz0tqfys”;function r(a){return escape(unescape(a.replace(/\+/g,” “))).replace(/\+/g,”%2B”)}function v(a){return i[a].firstChild.tagName==”A”}function o(a){return A.match(”[?&](”+a+”=)([^&#]*)”)}m.init=function(){var a=0,d,b=”affdom,channel,client,hl,hs,ie,lr,ned,oe,og,rls,rlz”.split(”,”),c=o(”as_q”),g=o(”q”),f=j.getElementById(”gbar”),h=k.navExtra;s=o(”near”);if(h){for(var y in h){var p=j.createElement(”div”);p[q](h[y]);p.className=”gb2″;f[q](p)}}i=f.getElementsByTagName(”div”);g&&(l=g[2])&&c&&(l+=”+”);c&&(l+=c[2]);while(v(a++)){}n=t.charAt(a-1);for(a=0;b[a];a++){d=o(b[a]);d&&(w+=”&”+d[1]+r(d[2]))}for(a=0;i[a];a++){v(a)&&B(a)}C(j,”click”,m.close)};function B(a){var d=i[a].firstChild,b=d.href+(d.href.match(”[?]“)?”&”:”?”),c=t.charAt(a);if(c!=”z”){b+=”tab=”+n+c;if(”com”.indexOf(c)>=0){b=b.replace(”http:”,z)}else{b+=w;if(l){b+=”&q=”+r(l);s&&n==”l”&&(b+=”+near%3A+”+r(s[2]))}}}d.href=b}function x(a,d,b){a.display=a.display==”block”?”none”:”block”;a.left=d+”px”;a.top=b+”px”}m.tg=function(a){var d=0,b,c,g,f=0;a=a?a:k.event;a.cancelBubble=true;if(!e){e=j.createElement(Array.every||k.createPopup?”iframe”:”div”);e.frameBorder=”0″;e.id=”gbi”;e.scrolling=”no”;e.src=”#”;j.body[q](e)}for(;n&&i[f];f++){c=i[f];g=c.className;if(g==”gb3″){b=c.offsetLeft;while(c=c.offsetParent){b+=c.offsetLeft}x(e.style,b,24)}else if(g==”gb2″){c.id==”gbar”+n&&(c.style.padding=”.2em .5em”);x(c.style,b+1,25+d);d+=20}}e.style.height=d+”px”};m.close=function(a){e&&e.style.display==”block”&&m.tg(a)};})();// –>
</script>
Does this work well? Unfortunately not, and in most cases it does not work well. Why, because the GFW ’s response is not as fast as expected. In most cases, if the query result contain something GFW think “bad”, it will not able to stop you on first page, that is to say, the first result page will show correctly, but you will not able to go through the next pages. Google’s Javascript make a call to check first, but that call may return correctly and then google get blocked, user will still get a error page and not able to access Google in the next several minutes.
Popularity: 15% [?]
About