0

I wrote a function where I pass a company name to lookup in a 2nd table a set of records, calculate a complicated result, and return the result.

I want to process all companies and add a value to each record with that result.

I am using the following code:

`aa <- mutate(companies,newcol=sum_rounds(companies$company_name))`

But I get the following warning:

Warning message:
In c("Bwom", "Symple", "TravelTriangle", "Ark Biosciences", "Artizan Biosciences", :
longer object length is not a multiple of shorter object length

(each of these is a company name)

The company dataframe gets a new column, but all values are "false" where actually there should be both true and false.

Any advice would be welcome to a newbie.

Function follows:

sum_rounds<-function(co_name) {
    #get records from rounds for the company name passed to the function
        #remove NAs from column roundtype too
    outval<- rounds %>%
    filter(company_name.x==co_name & !is.na(roundtype)) %>%
        #sort by date round is announced
    arrange(announced_on) %>%
    select(roundtype) %>%
        #create a string of all round types in order
    apply(2,paste,collapse="")

    #the values from mixed to "M", venture to "V" and pureangel to "A"
    # now see if it is of the form aaaaa (and #) followed by m or v
    #   in grep: ^ is start of a line and + is for ar least one copy
    # [mv] is either m or v
    # nice summary is here: http://www.endmemo.com/program/R/gsub.php
    #is angel2vc?
    angel2vc<-grepl("^a+[mv]+",outval)
    #return(list("roundcodes"=outval,"angel2vc"=angel2vc))
    return(angel2vc)
}

DPUT from Companies table Follows:

structure(list(company_name = c("Bwom", "Symple", "TravelTriangle", 
"Ark Biosciences", "Artizan Biosciences", "Audiense"), domain = c("b-wom.com", 
"getsymple.com", "traveltriangle.com", "arkbiosciences.com", 
NA, "audiense.com"), country_code = c("ESP", "USA", "USA", "CHN", 
"USA", "GBR"), state_code = c(NA, "CA", "VA", NA, "NC", NA), 
    region = c("Barcelona", "SF Bay Area", "Washington, D.C.", 
    "Shanghai", "Raleigh", "London"), city = c("Barcelona", "San Francisco", 
    "Charlottesville", "Shanghai", "Durham", "London"), status = c("operating", 
    "operating", "operating", "operating", "operating", "operating"
    ), short_description = c("Bwom is a tool that offers a test and personalized exercises for women's intimate health.", 
    "Symple is the cloud platform for all your business payments. Pay, get paid, connect.", 
    "TravelTriangle enables travel enthusiasts to reserve a personalized holiday plan with a local travel agent.", 
    "Ark Biosciences is a biopharmaceutical company that is dedicated to the discovery and development", 
    "Artizan Biosciences", "SaaS developer delivering unique consumer insight and engagement capabilities to many of the world’s biggest brands and agencies."
    ), category_list = c("health care", "cloud computing|machine learning|mobile apps|mobile payments|retail technology", 
    "e-commerce|personalization|tourism|travel", "health care", 
    "biopharma", "analytics|apps|marketing|market research|social crm|social media|social media marketing"
    ), category_group_list = c("health care", "apps|commerce and shopping|data and analytics|financial services|hardware|internet services|mobile|payments|software", 
    "commerce and shopping|travel and tourism", "health care", 
    "biotechnology|health care|science and engineering", "apps|data and analytics|design|information technology|internet services|media and entertainment|sales and marketing|software"
    ), employee_count = c("1 to 10", "11 to 50", "101 to 250", 
    NA, "1 to 10", "51 to 100"), funding_rounds = c(2L, 1L, 4L, 
    2L, 2L, 5L), funding_total_usd = c(1075791, 120000, 19900000, 
    NA, 3e+06, 8013391), founded_on = structure(c(16555, 16770, 
    15156, 16071, NA, 14975), class = "Date"), first_funding_on = structure(c(16526, 
    17204, 15492, 16532, 17091, 15294), class = "Date"), last_funding_on = structure(c(17204, 
    17204, 17204, 17203, 17203, 17203), class = "Date"), closed_on = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), email = c("hello@b-wom.com", "info@getsymple.com", 
    "admin@traveltriangle.com", "info@arkbiosciences.com", NA, 
    "moreinfo@audiense.com"), phone = c(NA, NA, "'+91 98 99 120408", 
    "###############################################################################################################################################################################################################################################################", 
    NA, "###############################################################################################################################################################################################################################################################"
    ), cb_url = c("https://www.crunchbase.com/organization/bwom", 
    "https://www.crunchbase.com/organization/symple-2", "https://www.crunchbase.com/organization/traveltriangle-com", 
    "https://www.crunchbase.com/organization/ark-biosciences", 
    "https://www.crunchbase.com/organization/artizan-biosciences", 
    "https://www.crunchbase.com/organization/socialbro"), twitter_url = c("https://www.twitter.com/hellobwom", 
    NA, "https://www.twitter.com/traveltriangle", NA, NA, "https://www.twitter.com/socialbro"
    ), facebook_url = c("https://www.facebook.com/hellobwom/?fref=ts", 
    NA, "http://www.facebook.com/traveltriangle", NA, NA, "http://www.facebook.com/socialbro"
    ), uuid = c("e6096d58-3454-d982-0dbe-7de9b06cd493", "fd0ab78f-0dc4-1f18-21d1-7ce9ff7a173b", 
    "742043c1-c17a-4526-4ed0-e911e6e9555b", "8e27eb22-ce03-a2af-58ba-53f0f458f49c", 
    "ed07ac9e-1071-fca0-46d9-42035c2da505", "fed333e5-2754-7413-1e3d-5939d70541d2"
    ), isbio = c("other", "other", "other", "other", "bio", "other"
    ), co_type = c("m", "m", "m", "v", "v", "m")), .Names = c("company_name", 
"domain", "country_code", "state_code", "region", "city", "status", 
"short_description", "category_list", "category_group_list", 
"employee_count", "funding_rounds", "funding_total_usd", "founded_on", 
"first_funding_on", "last_funding_on", "closed_on", "email", 
"phone", "cb_url", "twitter_url", "facebook_url", "uuid", "isbio", 
"co_type"), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))
> 
  • 1
    Does your function accept multiple company names at the moment? Or is it expecting the input to just be a single company name? – Dason Jun 20 '17 at 17:46
  • 1
    Welcome to StackOverflow. Please read through the following on how to make a great reproducible example when you're asking a question: https://stackoverflow.com/a/5963610/6203226. It would especially help if you ran `dput(head(companies))` so that we can run code on data exactly like you are. – Steven M. Mortimer Jun 20 '17 at 17:51
  • The function accepts a single company same at present. – Jeff Behrens Jun 20 '17 at 18:37

0 Answers0