User:Bawolff/sanbox/leadGenerator.js

importScript('user:Bawolff/mwapilib2.js'); if (!window.Bawolff) { window.Bawolff = {}; } // http://en.wikinews.org/w/api.php?action=query&prop=revisions&titles=template:Lead%20article%201|template:Lead%20article%202|template:Lead%20article%203|template:Lead%20article%204|template:Lead%20article%205&rvprop=timestamp|content //Call as Bawolff.leadGen(pageName, 1, alert) //replacing 1 with which lead (1-5), alert with your callback //optionally takes summary method (as numeric) argument. choose 0-4 Bawolff.leadGen = function (title, leadNumb, callback, summaryMethod) { api(title).getPage.lift(Bawolff.leadGen.extract, title, summaryMethod).lift(Bawolff.leadGen.create, leadNumb).lift(callback, title).exec; }

/******** This is a list that maps categories/infboxes to generic images

Note: the category map only works with categories explicity included. it does not count categories included by templates Note: this does not consider templates with parameters

Bawolff.leadGen.imgMap = { "UK" : "Flag of the United Kingdom.svg", "United States": "Flag of the United States.svg", "Canada": "Flag of Canada.svg", "Computing": "Computer-aj aj ashton 01.svg", "Google": "Google logo png.png", "Obituary": "Wikinews tag obituary.png", "Science and technology infobox": "Science-symbol-2.svg", "Science and technology": "Science-symbol-2.svg", "ASEAN": "LocationASEAN.png", "Afghanistan": "LocationAfghanistan.png", "Albania": "LocationAlbania.png", "Algeria": "LocationAlgeria.png", "Andorra": "LocationAndorra.png", "Angola": "LocationAngola.png", "Antarctica": "LocationAntarctica.png", "Argentina": "LocationArgentina.png", "Armenia": "LocationArmenia.png", "Australia": "LocationAustralia.png", "Austria": "LocationAustria.png", "Bahrain": "LocationBahrain.png", "Bahamas": "LocationBahamas.png", "Barbados": "LocationBarbados.png", "Belarus": "LocationBelarus.png", "Belgium": "LocationBelgium.png", "Bangladesh": "LocationBangladesh.png", "Azerbaijan": "LocationAzerbaijan.png", "Belize": "LocationBelize.png", "Benin": "LocationBenin.png", "Bhutan": "LocationBhutan.png", "Brazil": "LocationBrazil.png", "Brunei": "LocationBrunei.png", "Bolivia": "LocationBolivia.png", "Botswana": "LocationBotswana.png", "Burkina Faso": "LocationBurkinaFaso.png", "Burundi": "LocationBurundi.png", "Bulgaria": "LocationBulgaria.png", "Cameroon": "LocationCameroon.png", "Chad": "LocationChad.png", "Cape Verde": "LocationCapeVerde.png", "Central African Republic": "LocationCentralAfricanRepublic.png", "Chile": "LocationChile.png", "China": "LocationChina.png", "Colombia": "LocationColombia.png", "Comoros": "LocationComoros.png", "Croatia": "LocationCroatia.png", "Costa Rica": "LocationCostaRica.png", "Cuba": "LocationCuba.png", "Cyprus": "LocationCyprus.png", "Czech Republic": "LocationCzechRepublic.png", "Denmark": "LocationDenmark.png", "Djibouti": "LocationDjibouti.png", "Dominica": "LocationDominica.png", "Dominican Republic": "LocationDominicanRepublic.png", "Egypt": "LocationEgypt.png", "Ecuador": "LocationEcuador.png", "East Timor": "LocationEastTimor.png", "El Salvador": "LocationElSalvador.png", "Equatorial Guinea": "LocationEquatorialGuinea.png", "Eritrea": "LocationEritrea.png", "Estonia": "LocationEstonia.png", "Fiji": "LocationFiji.png", "Finland": "LocationFinland.png", "Ethiopia": "LocationEthiopia.png", "France": "LocationFrance.png", "Gabon": "LocationGabon.png", "Gambia": "LocationGambia.png", "Georgia": "LocationGeorgia.png", "Germany": "LocationGermany.png", "Ghana": "LocationGhana.png", "Greece": "LocationGreece.png", "Grenada": "LocationGrenada.png", "Guatemala": "LocationGuatemala.png", "Guinea": "LocationGuinea.png", "Guyana": "LocationGuyana.png", "Haiti": "LocationHaiti.png", "Honduras": "LocationHonduras.png", "Hungary": "LocationHungary.png", "Iceland": "LocationIceland.png", "Indonesia": "LocationIndonesia.png", "Iran": "LocationIran.png", "Iraq": "LocationIraq.png", "Isle of Man": "LocationIsleofMan.png", "Italy": "LocationItaly.png", "Israel": "LocationIsrael.png", "Japan": "LocationJapan.png", "Jamaica": "LocationJamaica.png", "Jordan": "LocationJordan.png", "Kazakhstan": "LocationKazakhstan.png", "Kuwait": "LocationKuwait.png", "Kyrgyzstan": "LocationKyrgyzstan.png", "Laos": "LocationLaos.png", "Latvia": "LocationLatvia.png", "Lebanon": "LocationLebanon.png", "Kenya": "LocationKenya.png", "Lesotho": "LocationLesotho.png", "Liberia": "LocationLiberia.png", "Libya": "LocationLibya.png", "Liechtenstein": "LocationLiechtenstein.png", "Lithuania": "LocationLithuania.png", "Luxembourg": "LocationLuxembourg.png", "Madagascar": "LocationMadagascar.png", "Malawi": "LocationMalawi.png", "Maldives": "LocationMaldives.png", "Malaysia": "LocationMalaysia.png", "Mali": "LocationMali.png", "Malta": "LocationMalta.png", "Mauritania": "LocationMauritania.png", "Mexico": "LocationMexico.png", "Mauritius": "LocationMauritius.png", "Moldova": "LocationMoldova.png", "Monaco": "LocationMonaco.png", "Montenegro": "LocationMontenegro.png", "Morocco": "LocationMorocco.png", "Mongolia": "LocationMongolia.png", "Mozambique": "LocationMozambique.png", "Myanmar": "LocationMyanmar.png", "Nepal": "LocationNepal.png", "Namibia": "LocationNamibia.png", "Netherlands": "LocationNetherlands.png", "New Zealand": "LocationNewZealand.png", "Nicaragua": "LocationNicaragua.png", "Nigeria": "LocationNigeria.png", "North Korea": "LocationNorthKorea.png", "Niger": "LocationNiger.png", "Norway": "LocationNorway.png", "Oman": "LocationOman.png", "Pakistan": "LocationPakistan.png", "Palestine": "LocationPalestine.png", "Papua New Guinea": "LocationPapuaNewGuinea.png", "Panama": "LocationPanama.png", "Paraguay": "LocationParaguay.png", "Peru": "LocationPeru.png", "Philippines": "LocationPhilippines.png", "Portugal": "LocationPortugal.png", "Poland": "LocationPoland.png", "Qatar": "LocationQatar.png", "Romania": "LocationRomania.png", "Russia": "LocationRussia.png", "Rwanda": "LocationRwanda.png", "Saint Lucia": "LocationSaintLucia.png", "San Marino": "LocationSanMarino.png", "Saudi Arabia": "LocationSaudiArabia.png", "Seychelles": "LocationSeychelles.png", "Serbia": "LocationSerbia.png", "Sierra Leone": "LocationSierraLeone.png", "Senegal": "LocationSenegal.png", "Singapore": "LocationSingapore.png", "Slovakia": "LocationSlovakia.png", "Slovenia": "LocationSlovenia.png", "Somalia": "LocationSomalia.png", "Solomon Islands": "LocationSolomonIslands.png", "South Africa": "LocationSouthAfrica.png", "South Korea": "LocationSouthKorea.png", "Spain": "LocationSpain.png", "Sri Lanka": "LocationSriLanka.png", "Suriname": "LocationSuriname.png", "Sudan": "LocationSudan.png", "Swaziland": "LocationSwaziland.png", "Sweden": "LocationSweden.png", "Switzerland": "LocationSwitzerland.png", "Syria": "LocationSyria.png", "Taiwan": "LocationTaiwan.png", "Tajikistan": "LocationTajikistan.png", "Tanzania": "LocationTanzania.png", "Thailand": "LocationThailand.png", "Togo": "LocationTogo.png", "Tonga": "LocationTonga.png", "Tunisia": "LocationTunisia.png", "Turkmenistan": "LocationTurkmenistan.png", "Turkey": "LocationTurkey.png", "Uganda": "LocationUganda.png", "Ukraine": "LocationUkraine.png", "United Arab Emirates": "LocationUnitedArabEmirates.png", "United Kingdom": "LocationUnitedKingdom.png", "Uruguay": "LocationUruguay.png", "Uzbekistan": "LocationUzbekistan.png", "Vatican City": "LocationVaticanCity.png", "Venezuela": "LocationVenezuela.png", "Vietnam": "LocationVietnam.png", "Yemen": "LocationYemen.png", "Zambia": "LocationZambia.png", "Western Sahara": "LocationWesternSahara.png", "Zimbabwe": "LocationZimbabwe.png"}

//takes the source of a wikipage, extracts the first image name without the leading namespace Bawolff.leadGen.extractImg = function (page) { var imgRegex = /\[\[(?:[iI][mM][aA][gG][eE]\:|[fF][Ii][lL][eE]\:)((?:[^\|\]])*?\.[pPsSjJgG][nNvVpPiI][gGeEfF][gG]?)[\|\]]?/; var img = imgRegex.exec(page); if (img && img.length >= 2) { return img[1]; } else { var infoboxRegex = /\{\{([^|}]*)\}\}/g; var categoryRegex = /\[\\*)(?:\|[^\]]*)?\]\]/g; //js seems to reuse these objects from prev calls without reseting lastIndex. reset lastIndex. infoboxRegex.lastIndex = categoryRegex.lastIndex = 0; var item; //note doesn't match infoboxes w/params

while (item = infoboxRegex.exec(page)) { item = item[1]; item = item.charAt(0).toUpperCase + item.substring(1, item.length); if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item]; } while (item = categoryRegex.exec(page)) { item = item[1]; item = item.charAt(0).toUpperCase + item.substring(1, item.length); if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item]; }

//return "Wikinews-logo.png"; //default return ""; }

} Bawolff.leadGen.extractType = function (page) { //valid types are: breaking, special, original, exclusive, urgent or none. //this currently does not detect special or urget. if (page.match(/\{\{[bB]reaking(?: [nN]ews)?\}\}/)) { return "breaking"; } else if (page.match(/\{\{[iI]nterview(?:\|[^}]*)?\}\}/i)) { return "exclusive"; } else if (page.match(/\{\{[oO]riginal(?: reporting)?(?:\|[^}]*)?\}\}/i)) { return "original"; } else { //default return "none"; }

}

Bawolff.leadGen.takeIntro = function (pageText, method) { //first test for redirects. var isRedirect = pageText.match(/^#redirect\s?\[\[([^\]]*)\]\]/i) if (isRedirect) { alert("It appears you are trying to use make lead on a redirect page. Please use the real page name instead (" + isRedirect[1] + ")."); throw new Error("Page is redirect. please manually resolve to: " + isRedirect[1] ); }

//doesn't handle links that make [1]. //intentionally doesn't strip ' chars (bold or italic) //as this often marks thigs with periods (E. coli) //method is a number (must be a number. no type conversion preformed). // 0: 1st sentence // 1: 1st two senetences // 2: 1st paragraph // 3: 1st 250 characters (+ a couple so we don't end in middle of word), or paragraph // 4: 1st 500 characters-ish, or paragraph var fixWLink = /\{\{[wW]\|([^\}]+)\}\}/g var stripTemplates = /\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*\}\})?\}\})?\}\})?\}\}/g; //the img regex, looks for a start of the image, than checks for nested internal links, external links, and for ending ]] in caption //templates shold already be stripped at this point. var img = /\[\IifF][mMIi][aALl][GgEe][eE]?\:(?:\[\[(?:[^\*)\]\]|[^\]]|\](?!\]))*]]/g

pageText = pageText.replace(fixWLink, '$1'); pageText = pageText.replace(stripTemplates, '') pageText = pageText.replace(img, ''); var pipedLink = /\[\]*\|([^\]\|]*)\]\]/g; pageText = pageText.replace(pipedLink, '$1'); var normLink = /\[\[([^\]\|]*)\]\]/g; pageText = pageText.replace(normLink, '$1'); var extLink = /\[(?:http|ftp|gopher|irc|https)\:[^\]\s]*\s?([^\*)]/g pageText = pageText.replace(extLink, '$1'); var firstPar; switch (method) { case 1: //1st 2 sentence or 1st paragraph firstPar = /[^\n]+?\s*\.(?=\s)(?:[^\n]+?\s[^\s\.]*\.(?=\s)|(?=\s))/; pageText = pageText.match(firstPar)[0]; break; case 2: //1st paragraph firstPar = /[^\n]+?(?=\n)/; pageText = pageText.match(firstPar)[0]; break; case 3: // 1st 250 characters-ish firstPar = /[^\n]{2,250}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; case 4: //1st 500 characters-ish firstPar = /[^\n]{2,500}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; default: //aka case 0. 1st sentence firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)/; pageText = pageText.match(firstPar)[0]; break; } return pageText; }

Bawolff.leadGen.extract = function(pageText, pageName, summaryMethod) { //editlink is overriden later. return { width:'100x100px', image: Bawolff.leadGen.extractImg(pageText), title: pageName, synopsis: Bawolff.leadGen.takeIntro(pageText, summaryMethod), order: 'right', edit_this: 'Wikinews:Sandbox', type: Bawolff.leadGen.extractType(pageText) }; }

Bawolff.leadGen.create = function (leadObj, leadNumb) { if (leadNumb === 1) { leadObj.width = "150x150px"; //ugly i know, but whatever. } var res = '{' + '{Lead 2.0'; res += "\n |id=" + leadNumb + " "; res += "\n |image=" + leadObj.image; res += "\n |width=" + leadObj.width; res += "\n |type=" + leadObj.type; res += "\n |title=" + leadObj.title; res += "\n |short_title="; res += "\n |summary=" + leadObj.synopsis; res += "\n}} \n ";

return res; } //calls its argument giving it an object with meta info about the current leads. //probably want to use Bawolff.leadGen.makeLeadTable instead.

Bawolff.leadGen.makeLeadMetaObject = function (callback) { var leadObj = function (doc) { var exTitle = function (text) { text = text.replace(/[\s\S]*?\|title=([^\|]*)[\s\S]*/, '$1'); text = text.replace(/\s*$/, ''); return text; } var pages = doc.getElementsByTagName('page');

var obj = {}, time, time2 = Infinity; for (var i = 0; i < pages.length; i++) { pages[i].getElementsByTagName('rev')[0].normalize; time = Bawolff.mwapi.parseAPIDate(pages[i].getElementsByTagName('rev')[0].getAttribute('timestamp')); //This is really ugly...  //put the oldest lead in a global variable. if (time < time2) { //this compares miliseconds after epoch Bawolff.leadGen.oldestLead = pages[i].getAttribute('title'); time2 = time; }  obj[pages[i].getAttribute('title')] = {timestamp: time, title: exTitle(pages[i].getElementsByTagName('rev')[0].firstChild.data)}; } return obj; } api.makeRequest({action: 'query', prop: 'revisions', titles: 'Template:Lead article 1|Template:Lead article 2|Template:Lead article 3|Template:Lead article 4|Template:Lead article 5', prop: 'revisions', rvprop: 'timestamp|content'}, leadObj).lift(callback).exec; }

Bawolff.leadGen.oldestLead = null; Bawolff.leadGen.makeLeadTable = function (callback) { var wrapper = function(leadObj) { var html = ' '; callback(html); } Bawolff.leadGen.makeLeadMetaObject(wrapper); }

Bawolff.leadGen.leadToPosition = function (numb) { //double equal sign intentional to convert from string. if (numb == 1) { return "Top"; } if (numb == 2) { return "Middle-left"; } if (numb == 3) { return "Middle-right"; } if (numb == 4) { return "Bottom-left"; } if (numb == 5) { return "Bottom-right"; } else { throw new Error("Invalid lead number (" + numb +") passed to Bawolff.leadGen.leadToPosition"); } }