{"id":"https://openalex.org/W2273593125","doi":"https://doi.org/10.1109/hipc.2015.15","title":"Optimizing Approximate Weighted Matching on Nvidia Kepler K40","display_name":"Optimizing Approximate Weighted Matching on Nvidia Kepler K40","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2273593125","doi":"https://doi.org/10.1109/hipc.2015.15","mag":"2273593125"},"language":"en","primary_location":{"id":"doi:10.1109/hipc.2015.15","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hipc.2015.15","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 22nd International Conference on High Performance Computing (HiPC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/1956/16752","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078612352","display_name":"Md. Naim","orcid":"https://orcid.org/0000-0001-9551-7499"},"institutions":[{"id":"https://openalex.org/I4432739","display_name":"University of Bergen","ror":"https://ror.org/03zga2b32","country_code":"NO","type":"education","lineage":["https://openalex.org/I4432739"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Md. Naim","raw_affiliation_strings":["University of Bergen Bergen, Norway"],"affiliations":[{"raw_affiliation_string":"University of Bergen Bergen, Norway","institution_ids":["https://openalex.org/I4432739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091535574","display_name":"Fredrik Manne","orcid":null},"institutions":[{"id":"https://openalex.org/I4432739","display_name":"University of Bergen","ror":"https://ror.org/03zga2b32","country_code":"NO","type":"education","lineage":["https://openalex.org/I4432739"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Fredrik Manne","raw_affiliation_strings":["University of Bergen Bergen, Norway"],"affiliations":[{"raw_affiliation_string":"University of Bergen Bergen, Norway","institution_ids":["https://openalex.org/I4432739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075175819","display_name":"Mahantesh Halappanavar","orcid":"https://orcid.org/0000-0002-2323-4753"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahantesh Halappanavar","raw_affiliation_strings":["Pacific Northwest National Lab, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Lab, Washington, USA","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041853964","display_name":"Antonino Tumeo","orcid":"https://orcid.org/0000-0001-9452-120X"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Antonino Tumeo","raw_affiliation_strings":["Pacific Northwest National Lab, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Lab, Washington, USA","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087562583","display_name":"Johannes Langguth","orcid":"https://orcid.org/0000-0003-4200-511X"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Johannes Langguth","raw_affiliation_strings":["Simula Research Laboratory Oslo, Norway"],"affiliations":[{"raw_affiliation_string":"Simula Research Laboratory Oslo, Norway","institution_ids":["https://openalex.org/I2799829267"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5078612352"],"corresponding_institution_ids":["https://openalex.org/I4432739"],"apc_list":null,"apc_paid":null,"fwci":0.9955,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.77847604,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"105","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10720","display_name":"Complexity and Algorithms in Graphs","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8186966180801392},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7166552543640137},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6844687461853027},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5963231921195984},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.589947521686554},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.5858394503593445},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.48921966552734375},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4883382022380829},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.48314163088798523},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4825443625450134},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.4538884460926056},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.43911752104759216},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4363621473312378},{"id":"https://openalex.org/keywords/kepler","display_name":"Kepler","score":0.4271897077560425},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4107632040977478},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33000820875167847},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10826694965362549}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8186966180801392},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7166552543640137},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6844687461853027},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5963231921195984},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.589947521686554},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.5858394503593445},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.48921966552734375},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4883382022380829},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.48314163088798523},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4825443625450134},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.4538884460926056},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.43911752104759216},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4363621473312378},{"id":"https://openalex.org/C207963374","wikidata":"https://www.wikidata.org/wiki/Q47592","display_name":"Kepler","level":3,"score":0.4271897077560425},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4107632040977478},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33000820875167847},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10826694965362549},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C150846664","wikidata":"https://www.wikidata.org/wiki/Q7602306","display_name":"Stars","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/hipc.2015.15","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hipc.2015.15","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 22nd International Conference on High Performance Computing (HiPC)","raw_type":"proceedings-article"},{"id":"pmh:oai:bora.uib.no:1956/16752","is_oa":true,"landing_page_url":"https://hdl.handle.net/1956/16752","pdf_url":null,"source":{"id":"https://openalex.org/S4306400085","display_name":"Bergen Open Research Archive (BORA) (University of Bergen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4432739","host_organization_name":"University of Bergen","host_organization_lineage":["https://openalex.org/I4432739"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/bookPart"}],"best_oa_location":{"id":"pmh:oai:bora.uib.no:1956/16752","is_oa":true,"landing_page_url":"https://hdl.handle.net/1956/16752","pdf_url":null,"source":{"id":"https://openalex.org/S4306400085","display_name":"Bergen Open Research Archive (BORA) (University of Bergen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4432739","host_organization_name":"University of Bergen","host_organization_lineage":["https://openalex.org/I4432739"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/bookPart"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W118651885","https://openalex.org/W140130170","https://openalex.org/W347697680","https://openalex.org/W1501106692","https://openalex.org/W1855227287","https://openalex.org/W1972222248","https://openalex.org/W1983582642","https://openalex.org/W2017431061","https://openalex.org/W2018658595","https://openalex.org/W2021642912","https://openalex.org/W2035080386","https://openalex.org/W2088687202","https://openalex.org/W2093832225","https://openalex.org/W2093992309","https://openalex.org/W2109473404","https://openalex.org/W2153752762","https://openalex.org/W2225892715","https://openalex.org/W2988480584","https://openalex.org/W4231181278","https://openalex.org/W6604846346"],"related_works":["https://openalex.org/W2983282793","https://openalex.org/W1973046741","https://openalex.org/W2475524688","https://openalex.org/W2739740241","https://openalex.org/W947442053","https://openalex.org/W2085105049","https://openalex.org/W2592417500","https://openalex.org/W1974923383","https://openalex.org/W2526069705","https://openalex.org/W2024016913"],"abstract_inverted_index":{"Matching":[0],"is":[1],"a":[2,79,92,170,181],"fundamental":[3],"graph":[4,199],"problem":[5],"with":[6,85,152],"numerous":[7],"applications":[8,175],"in":[9,165,191],"science":[10],"and":[11,34,121,140,162,188],"engineering.":[12],"While":[13],"algorithms":[14,24,161,200],"for":[15,51,78,117,126,155],"computing":[16],"optimal":[17],"matchings":[18],"are":[19,35],"difficult":[20],"to":[21,37,74,91,115,124,133,137,142,145],"parallelize,":[22],"approximation":[23],"on":[25,58,148,173,201],"the":[26,47,55,68,104,108,156],"other":[27,196],"hand":[28],"generally":[29],"compute":[30,183],"high":[31],"quality":[32],"solutions":[33],"amenable":[36],"parallelization.":[38],"In":[39],"this":[40,166,192],"paper,":[41],"we":[42,101],"present":[43],"efficient":[44],"implementations":[45,163],"of":[46,67,88,98],"current":[48],"best":[49,110],"algorithm":[50,69,112,186],"half-approximate":[52],"weighted":[53],"matching,":[54],"Suitor":[56],"algorithm,":[57],"Nvidia":[59],"Kepler":[60],"K-40":[61],"platform.":[62],"We":[63,82,129],"develop":[64],"four":[65],"variants":[66],"that":[70,103,176],"exploit":[71],"hardware":[72],"features":[73],"address":[75],"key":[76,182],"challenges":[77],"GPU":[80,111,203],"implementation.":[81],"also":[83,130],"experiment":[84],"different":[86],"combinations":[87],"work":[89],"assigned":[90],"warp.":[93],"Using":[94],"an":[95],"exhaustive":[96],"set":[97],"269":[99],"inputs,":[100],"demonstrate":[102,131],"new":[105,160],"implementation":[106],"outperforms":[107],"previous":[109],"by":[113],"10":[114],"100x":[116],"over":[118],"100":[119,123],"instances,":[120],"from":[122],"1000x":[125],"15":[127],"instances.":[128],"up":[132,141],"20x":[134],"speedup":[135],"relative":[136,144],"2":[138],"threads,":[139],"5x":[143],"16":[146,153],"threads":[147],"Intel":[149],"Xeon":[150],"platform":[151],"cores":[154],"same":[157],"algorithm.":[158],"The":[159],"provided":[164,190],"paper":[167,193],"will":[168,194],"have":[169],"direct":[171],"impact":[172],"several":[174],"repeatedly":[177],"use":[178],"matching":[179],"as":[180],"kernel.":[184],"Further,":[185],"designs":[187],"insights":[189],"benefit":[195],"researchers":[197],"implementing":[198],"modern":[202],"architectures.":[204]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2016-06-24T00:00:00"}
