{"id":"https://openalex.org/W4390041365","doi":"https://doi.org/10.48550/arxiv.2312.11595","title":"TIP: Text-Driven Image Processing with Semantic and Restoration Instructions","display_name":"TIP: Text-Driven Image Processing with Semantic and Restoration Instructions","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4390041365","doi":"https://doi.org/10.48550/arxiv.2312.11595"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2312.11595","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003820559","display_name":"Chenyang Qi","orcid":"https://orcid.org/0000-0002-0407-1346"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi, Chenyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015173810","display_name":"Zhengzhong Tu","orcid":"https://orcid.org/0000-0002-7594-2292"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Zhengzhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058759451","display_name":"Keren Ye","orcid":"https://orcid.org/0000-0002-7349-7762"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Keren","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084566603","display_name":"Mauricio Delbracio","orcid":"https://orcid.org/0000-0001-7539-2991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Delbracio, Mauricio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002085979","display_name":"Peyman Milanfar","orcid":"https://orcid.org/0000-0003-1455-7662"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Milanfar, Peyman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100719529","display_name":"Qifeng Chen","orcid":"https://orcid.org/0000-0003-2199-3948"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Qifeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102759044","display_name":"Hossein Talebi","orcid":"https://orcid.org/0000-0002-5962-2563"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Talebi, Hossein","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks in Image Processing","score":0.9969,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks in Image Processing","score":0.9969,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Single Image Super-Resolution Techniques","score":0.9829,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9675,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deblurring","display_name":"Deblurring","score":0.7963681},{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.5956228},{"id":"https://openalex.org/keywords/image-inpainting","display_name":"Image Inpainting","score":0.588098},{"id":"https://openalex.org/keywords/texture-synthesis","display_name":"Texture Synthesis","score":0.567308},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering","score":0.537866},{"id":"https://openalex.org/keywords/image-synthesis","display_name":"Image Synthesis","score":0.531142},{"id":"https://openalex.org/keywords/super-resolution","display_name":"Super-Resolution","score":0.528412}],"concepts":[{"id":"https://openalex.org/C2777693668","wikidata":"https://www.wikidata.org/wiki/Q25053743","display_name":"Deblurring","level":5,"score":0.7963681},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78902674},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.62053066},{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.5956228},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.52913886},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.479016},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4291635},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42659593},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3808361},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.34492683},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32060665},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09046799},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.11595","pdf_url":"http://arxiv.org/pdf/2312.11595","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2312.11595","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.56}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W791927757","https://openalex.org/W3207832039","https://openalex.org/W3153582293","https://openalex.org/W3080537281","https://openalex.org/W2905397092","https://openalex.org/W2289746762","https://openalex.org/W2269775642","https://openalex.org/W2182590612","https://openalex.org/W2139384960","https://openalex.org/W2031788393"],"abstract_inverted_index":{"Text-driven":[0,53],"diffusion":[1],"models":[2],"have":[3],"become":[4],"increasingly":[5],"popular":[6],"for":[7,31,121],"various":[8],"image":[9,34,68],"editing":[10],"tasks,":[11,36],"including":[12],"inpainting,":[13],"stylization,":[14],"and":[15,42],"object":[16],"replacement.":[17],"However,":[18],"it":[19],"still":[20],"remains":[21],"an":[22],"open":[23],"research":[24],"problem":[25],"to":[26,65,86,141,162],"adopt":[27],"this":[28,47],"language-vision":[29],"paradigm":[30],"more":[32],"fine-level":[33,108],"processing":[35],"such":[37],"as":[38,61],"denoising,":[39],"super-resolution,":[40],"deblurring,":[41],"compression":[43],"artifact":[44],"removal.":[45],"In":[46,125],"paper,":[48],"we":[49,82,127],"develop":[50],"TIP,":[51],"a":[52,62,129],"Image":[54],"Processing":[55],"framework":[56,105],"that":[57,106,133],"leverages":[58],"natural":[59],"language":[60],"user-friendly":[63],"interface":[64],"control":[66,174],"the":[67,73,88,96,103,115,119,135,143,155,163,166,170,176],"restoration":[69,97,116,149,157,177],"process.":[70],"We":[71],"consider":[72],"capacity":[74],"of":[75,114,159,165,172],"text":[76],"information":[77],"in":[78,95],"two":[79],"dimensions.":[80],"First,":[81],"use":[83],"content-related":[84],"prompts":[85],"enhance":[87],"semantic":[89],"alignment,":[90],"effectively":[91],"alleviating":[92],"identity":[93],"ambiguity":[94],"outcomes.":[98],"Second,":[99],"our":[100],"approach":[101],"is":[102],"first":[104],"supports":[107],"instruction":[109],"through":[110],"language-based":[111],"quantitative":[112],"specification":[113],"strength,":[117],"without":[118],"need":[120],"explicit":[122],"task-specific":[123],"design.":[124],"addition,":[126],"introduce":[128],"novel":[130],"fusion":[131],"mechanism":[132],"augments":[134],"existing":[136],"ControlNet":[137],"architecture":[138],"by":[139],"learning":[140],"rescale":[142],"generative":[144],"prior,":[145],"thereby":[146],"achieving":[147],"better":[148],"fidelity.":[150],"Our":[151],"extensive":[152],"experiments":[153],"demonstrate":[154],"superior":[156],"performance":[158],"TIP":[160],"compared":[161],"state":[164],"arts,":[167],"alongside":[168],"offering":[169],"flexibility":[171],"text-based":[173],"over":[175],"effects.":[178]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4390041365","counts_by_year":[],"updated_date":"2024-11-22T03:32:45.544386","created_date":"2023-12-21"}