/*******************************************************************************************/
/* FileName: iclink.sas */
/* Date: Sept 25, 2006 */
/* Author: Rabih Moussawi */
/* Description: Create IBES - CRSP Link Table */
/* FUNCTION: - Creates a link table between IBES TICKER and CRSP PERMNO */
/* - Scores links from 0 (best link) to 6 */
/* */
/* INPUT: */
/* - IBES: IDUSM file */
/* - CRSP: STOCKNAMES file */
/* */
/* OUTPUT: ICLINK set stored in home directory */
/* ICLINK has 15,187 unique IBES TICKER - CRSP PERMNO links */
/* ICLINK contains IBES TICKER and the matching CRSP PERMNO and other fields: */
/* - IBES and CRSP Company names */
/* - SCORE variable: lower scores are better and high scores may need further */
/* checking before using them to link CRSP & IBES data. */
/* In computing the score, a CUSIP match is considered better than a */
/* TICKER match. The score also includes a penalty for differences in */
/* company names-- CNAME in IBES and COMNAM in CRSP. The name penalty is */
/* based upon SPEDIS, which is the spelling distance function in SAS. */
/* SPEDIS(cname,comnam)=0 is a perfect score and SPEDIS < 30 is usually good */
/* enough to be considered a name match. */
/* */
/* "SCORE" levels: */
/* - 0: BEST match: using (cusip, cusip dates and company names) */
/* or (exchange ticker, company names and 6-digit cusip) */
/* - 1: Cusips and cusip dates match but company names do not match */
/* - 2: Cusips and company names match but cusip dates do not match */
/* - 3: Cusips match but cusip dates and company names do not match */
/* - 4: Exch tickers and 6-digit cusips match but company names do not match */
/* - 5: Exch tickers and company names match but 6-digit cusips do not match */
/* - 6: Exch tickers match but company names and 6-digit cusips do not match */
/* */
/* ICLINK Example: */
/* TICKER CNAME PERMNO COMNAM SCORE */
/* BAC BANKAMERICA CORPORATION 58827 BANKAMERICA CORP 0 */
/* DELL DELL INC 11081 DELL INC 0 */
/* FFS 1ST FED BCP DEL 75161 FIRST FEDERAL BANCORP DE 3 */
/* IBM INTERNATIONAL BUSINESS MACHINES 12490 INTERNATIONAL BUSINESS MACHS CO 0 */
/* MSFT MICROSOFT CORP 10107 MICROSOFT CORP 0 */
/* */
/*******************************************************************************************/
* Possible IBES ID (names) file to use (as of April 2006);
* Detail History: ID file : 23808 unique US and Canadian company IBES TICKERs;
* Summary History: IDSUM File: 15576 unique US company IBES TICKERs;
* Recommendation Summary Statistics: RECDSUM File 12465 unique US company IBES tickers;
* It seems that the Summary History Identifier file IDSUM is best
because USFIRM dummy is used to designate only US companies;
%let IBES1= IBES.IDSUM;
%let CRSP1= CRSP.STOCKNAMES;
libname home '~'; * Save link table in home directory;
/* Step 1: Link by CUSIP */
/* IBES: Get the list of IBES TICKERS for US firms in IBES */
proc sort data=&IBES1 out=IBES1 (keep=ticker cusip CNAME sdates);
where USFIRM=1 and not(missing(cusip));
by ticker cusip sdates;
run;
/* Create first and last 'start dates' for CUSIP link */
proc sql;
create table IBES2
as select *, min(sdates) as fdate, max(sdates) as ldate
from IBES1
group by ticker, cusip
order by ticker, cusip, sdates;
quit;
/* Label date range variables and keep only most recent company name for CUSIP link */
data IBES2;
set IBES2;
by ticker cusip;
if last.cusip;
label fdate="First Start date of CUSIP record";
label ldate="Last Start date of CUSIP record";
format fdate ldate date9.;
drop sdates;
run;
/* CRSP: Get all PERMNO-NCUSIP combinations */
proc sort data=&CRSP1 out=CRSP1 (keep=PERMNO NCUSIP comnam namedt nameenddt);
where not missing(NCUSIP);
by PERMNO NCUSIP namedt;
run;
/* Arrange effective dates for CUSIP link */
proc sql;
create table CRSP2
as select PERMNO,NCUSIP,comnam,min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by PERMNO, NCUSIP
order by PERMNO, NCUSIP, NAMEDT;
quit;
/* Label date range variables and keep only most recent company name */
data CRSP2;
set CRSP2;
by permno ncusip;
if last.ncusip;
label namedt="Start date of CUSIP record";
label nameenddt="End date of CUSIP record";
format namedt nameenddt date9.;
run;
/* Create CUSIP Link Table */
/* CUSIP date ranges are only used in scoring as CUSIPs are not reused for
different companies overtime */
proc sql;
create table LINK1_1
as select *
from IBES2 as a, CRSP2 as b
where a.CUSIP = b.NCUSIP
order by TICKER, PERMNO, ldate;
quit; * 14,591 IBES TICKERs matched to CRSP PERMNOs;
/* Score links using CUSIP date range and company name spelling distance */
/* Idea: date ranges the same cusip was used in CRSP and IBES should intersect */
data LINK1_2;
set LINK1_1;
by TICKER PERMNO;
if last.permno; * Keep link with most recent company name;
name_dist = min(spedis(cname,comnam),spedis(comnam,cname));
if (not ((ldate<namedt) or (fdate>nameenddt))) and name_dist < 30 then SCORE = 0;
else if (not ((ldate<namedt) or (fdate>nameenddt))) then score = 1;
else if name_dist < 30 then SCORE = 2;
else SCORE = 3;
keep TICKER PERMNO cname comnam score;
run;
/* Step 2: Find links for the remaining unmatched cases using Exchange Ticker */
/* Identify remaining unmatched cases */
proc sql;
create table NOMATCH1
as select distinct a.*
from IBES1 (keep=ticker) as a
where a.ticker NOT in (select ticker from LINK1_2)
order by a.ticker;
quit; * 990 IBES TICKERs not matched with CRSP PERMNOs using CUSIP;
/* Add IBES identifying information */
proc sql;
create table NOMATCH2
as select b.ticker, b.CNAME, b.OFTIC, b.sdates, b.cusip
from NOMATCH1 as a, &IBES1 as b
where a.ticker = b.ticker and not (missing(b.OFTIC))
order by ticker, oftic, sdates;
quit; * 4,157 observations;
/* Create first and last 'start dates' for Exchange Tickers */
proc sql;
create table NOMATCH3
as select *, min(sdates) as fdate, max(sdates) as ldate
from NOMATCH2
group by ticker, oftic
order by ticker, oftic, sdates;
quit;
/* Label date range variables and keep only most recent company name */
data NOMATCH3;
set NOMATCH3;
by ticker oftic;
if last.oftic;
label fdate="First Start date of OFTIC record";
label ldate="Last Start date of OFTIC record";
format fdate ldate date9.;
drop sdates;
run;
/* Get entire list of CRSP stocks with Exchange Ticker information */
proc sort data=&CRSP1 out=CRSP1 (keep=ticker comnam permno ncusip namedt nameenddt);
where not missing(ticker);
by permno ticker namedt;
run;
/* Arrange effective dates for link by Exchange Ticker */
proc sql;
create table CRSP2
as select permno,comnam,ticker as crsp_ticker,ncusip,
min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by permno, ticker
order by permno, crsp_ticker, namedt;
quit; * CRSP exchange ticker renamed to crsp_ticker to avoid confusion with IBES TICKER;
/* Label date range variables and keep only most recent company name */
data CRSP2;
set CRSP2;
if last.crsp_ticker;
by permno crsp_ticker;
label namedt="Start date of exch. ticker record";
label nameenddt="End date of exch. ticker record";
format namedt nameenddt date9.;
run;
/* Merge remaining unmatched cases using Exchange Ticker */
/* Note: Use ticker date ranges as exchange tickers are reused overtime */
proc sql;
create table LINK2_1
as select a.ticker,a.oftic, b.permno, a.cname, b.comnam, a.cusip, b.ncusip, a.ldate
from NOMATCH3 as a, CRSP2 as b
where a.oftic = b.crsp_ticker and
(ldate>=namedt) and (fdate<=nameenddt)
order by ticker, oftic, ldate;
quit; * 146 new match of 136 IBES TICKERs;
/* Score using company name using 6-digit CUSIP and company name spelling distance */
data LINK2_2;
set LINK2_1;
name_dist = min(spedis(cname,comnam),spedis(comnam,cname));
if substr(cusip,1,6)=substr(ncusip,1,6) and name_dist < 30 then SCORE=0;
else if substr(cusip,1,6)=substr(ncusip,1,6) then score = 4;
else if name_dist < 30 then SCORE = 5;
else SCORE = 6;
run;
/* Some companies may have more than one TICKER-PERMNO link, */
/* so re-sort and keep the case (PERMNO & Company name from CRSP) */
/* that gives the lowest score for each IBES TICKER (first.ticker=1) */
proc sort data=LINK2_2; by ticker score; run;
data LINK2_3;
set LINK2_2;
by ticker score;
if first.ticker;
keep ticker permno cname comnam permno score;
run;
/* Step 3: Add Exchange Ticker links to CUSIP links */
/* Create final link table and save it in home directory */
data home.ICLINK;
set LINK1_2 LINK2_3;
run;
proc sort data=home.ICLINK; by TICKER PERMNO; run;
/* Create Labels for ICLINK dataset and variables */
proc datasets lib=home nolist;
modify ICLINK (label="IBES-CRSP Link Table");
label CNAME = "Company Name in IBES";
label COMNAM= "Company Name in CRSP";
label SCORE= "Link Score: 0(best) - 6";
run;
quit;